retrieval_router.py 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892
  1. import textwrap
  2. from typing import Any, Optional
  3. from uuid import UUID
  4. from fastapi import Body, Depends
  5. from fastapi.responses import StreamingResponse
  6. from core.base import (
  7. GenerationConfig,
  8. Message,
  9. R2RException,
  10. SearchMode,
  11. SearchSettings,
  12. select_search_filters,
  13. )
  14. from core.base.api.models import (
  15. WrappedAgentResponse,
  16. WrappedCompletionResponse,
  17. WrappedRAGResponse,
  18. WrappedSearchResponse,
  19. )
  20. from ...abstractions import R2RProviders, R2RServices
  21. from .base_router import BaseRouterV3
  22. def merge_search_settings(
  23. base: SearchSettings, overrides: SearchSettings
  24. ) -> SearchSettings:
  25. # Convert both to dict
  26. base_dict = base.model_dump()
  27. overrides_dict = overrides.model_dump(exclude_unset=True)
  28. # Update base_dict with values from overrides_dict
  29. # This ensures that any field set in overrides takes precedence
  30. for k, v in overrides_dict.items():
  31. base_dict[k] = v
  32. # Construct a new SearchSettings from the merged dict
  33. return SearchSettings(**base_dict)
  34. class RetrievalRouterV3(BaseRouterV3):
  35. def __init__(
  36. self,
  37. providers: R2RProviders,
  38. services: R2RServices,
  39. ):
  40. super().__init__(providers, services)
  41. def _register_workflows(self):
  42. pass
  43. def _prepare_search_settings(
  44. self,
  45. auth_user: Any,
  46. search_mode: SearchMode,
  47. search_settings: Optional[SearchSettings],
  48. ) -> SearchSettings:
  49. """
  50. Prepare the effective search settings based on the provided search_mode,
  51. optional user-overrides in search_settings, and applied filters.
  52. """
  53. if search_mode != SearchMode.custom:
  54. # Start from mode defaults
  55. effective_settings = SearchSettings.get_default(search_mode.value)
  56. if search_settings:
  57. # Merge user-provided overrides
  58. effective_settings = merge_search_settings(
  59. effective_settings, search_settings
  60. )
  61. else:
  62. # Custom mode: use provided settings or defaults
  63. effective_settings = search_settings or SearchSettings()
  64. # Apply user-specific filters
  65. effective_settings.filters = select_search_filters(
  66. auth_user, effective_settings
  67. )
  68. return effective_settings
  69. def _setup_routes(self):
  70. @self.router.post(
  71. "/retrieval/search",
  72. dependencies=[Depends(self.rate_limit_dependency)],
  73. summary="Search R2R",
  74. openapi_extra={
  75. "x-codeSamples": [
  76. {
  77. "lang": "Python",
  78. "source": textwrap.dedent(
  79. """
  80. from r2r import R2RClient
  81. client = R2RClient()
  82. # if using auth, do client.login(...)
  83. # Basic mode, no overrides
  84. response = client.retrieval.search(
  85. query="Who is Aristotle?",
  86. search_mode="basic"
  87. )
  88. # Advanced mode with overrides
  89. response = client.retrieval.search(
  90. query="Who is Aristotle?",
  91. search_mode="advanced",
  92. search_settings={
  93. "filters": {"document_id": {"$eq": "3e157b3a-..."}},
  94. "limit": 5
  95. }
  96. )
  97. # Custom mode with full control
  98. response = client.retrieval.search(
  99. query="Who is Aristotle?",
  100. search_mode="custom",
  101. search_settings={
  102. "use_semantic_search": True,
  103. "filters": {"category": {"$like": "%philosophy%"}},
  104. "limit": 20,
  105. "chunk_settings": {"limit": 20},
  106. "graph_settings": {"enabled": True}
  107. }
  108. )
  109. """
  110. ),
  111. },
  112. {
  113. "lang": "JavaScript",
  114. "source": textwrap.dedent(
  115. """
  116. const { r2rClient } = require("r2r-js");
  117. const client = new r2rClient();
  118. function main() {
  119. const response = await client.search({
  120. query: "Who is Aristotle?",
  121. search_settings: {
  122. filters: {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}},
  123. useSemanticSearch: true,
  124. chunkSettings: {
  125. limit: 20, # separate limit for chunk vs. graph
  126. enabled: true
  127. },
  128. graphSettings: {
  129. enabled: true,
  130. }
  131. }
  132. });
  133. }
  134. main();
  135. """
  136. ),
  137. },
  138. {
  139. "lang": "CLI",
  140. "source": textwrap.dedent(
  141. """
  142. r2r retrieval search --query "Who is Aristotle?"
  143. """
  144. ),
  145. },
  146. {
  147. "lang": "Shell",
  148. "source": textwrap.dedent(
  149. """
  150. curl -X POST "https://api.example.com/retrieval/search" \\
  151. -H "Content-Type: application/json" \\
  152. -H "Authorization: Bearer YOUR_API_KEY" \\
  153. -d '{
  154. "query": "Who is Aristotle?",
  155. "search_settings": {
  156. filters: {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}},
  157. use_semantic_search: true,
  158. chunk_settings: {
  159. limit: 20, # separate limit for chunk vs. graph
  160. enabled: true
  161. },
  162. graph_settings: {
  163. enabled: true,
  164. }
  165. }
  166. }'
  167. """
  168. ),
  169. },
  170. ]
  171. },
  172. )
  173. @self.base_endpoint
  174. async def search_app(
  175. query: str = Body(
  176. ...,
  177. description="Search query to find relevant documents",
  178. ),
  179. search_mode: SearchMode = Body(
  180. default=SearchMode.custom,
  181. description=(
  182. "Default value of `custom` allows full control over search settings.\n\n"
  183. "Pre-configured search modes:\n"
  184. "`basic`: A simple semantic-based search.\n"
  185. "`advanced`: A more powerful hybrid search combining semantic and full-text.\n"
  186. "`custom`: Full control via `search_settings`.\n\n"
  187. "If `filters` or `limit` are provided alongside `basic` or `advanced`, "
  188. "they will override the default settings for that mode."
  189. ),
  190. ),
  191. search_settings: Optional[SearchSettings] = Body(
  192. None,
  193. description=(
  194. "The search configuration object. If `search_mode` is `custom`, "
  195. "these settings are used as-is. For `basic` or `advanced`, these settings will override the default mode configuration.\n\n"
  196. "Common overrides include `filters` to narrow results and `limit` to control how many results are returned."
  197. ),
  198. ),
  199. auth_user=Depends(self.providers.auth.auth_wrapper()),
  200. ) -> WrappedSearchResponse:
  201. """
  202. Perform a search query against vector and/or graph-based databases.
  203. **Search Modes:**
  204. - `basic`: Defaults to semantic search. Simple and easy to use.
  205. - `advanced`: Combines semantic search with full-text search for more comprehensive results.
  206. - `custom`: Complete control over how search is performed. Provide a full `SearchSettings` object.
  207. **Filters:**
  208. Apply filters directly inside `search_settings.filters`. For example:
  209. ```json
  210. {
  211. "filters": {"document_id": {"$eq": "3e157b3a-..."}}
  212. }
  213. ```
  214. Supported operators: `$eq`, `$neq`, `$gt`, `$gte`, `$lt`, `$lte`, `$like`, `$ilike`, `$in`, `$nin`.
  215. **Limit:**
  216. Control how many results you get by specifying `limit` inside `search_settings`. For example:
  217. ```json
  218. {
  219. "limit": 20
  220. }
  221. ```
  222. **Examples:**
  223. - Using `basic` mode and no overrides:
  224. Just specify `search_mode="basic"`.
  225. - Using `advanced` mode and applying a filter:
  226. Specify `search_mode="advanced"` and include `search_settings={"filters": {...}, "limit": 5}` to override defaults.
  227. - Using `custom` mode:
  228. Provide the entire `search_settings` to define your search exactly as you want it.
  229. """
  230. if query == "":
  231. raise R2RException("Query cannot be empty", 400)
  232. effective_settings = self._prepare_search_settings(
  233. auth_user, search_mode, search_settings
  234. )
  235. results = await self.services.retrieval.search(
  236. query=query,
  237. search_settings=effective_settings,
  238. )
  239. return results
  240. @self.router.post(
  241. "/retrieval/rag",
  242. dependencies=[Depends(self.rate_limit_dependency)],
  243. summary="RAG Query",
  244. response_model=None,
  245. openapi_extra={
  246. "x-codeSamples": [
  247. {
  248. "lang": "Python",
  249. "source": textwrap.dedent(
  250. """
  251. from r2r import R2RClient
  252. client = R2RClient()
  253. # when using auth, do client.login(...)
  254. response =client.retrieval.rag(
  255. query="Who is Aristotle?",
  256. search_settings={
  257. "use_semantic_search": True,
  258. "filters": {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}},
  259. "limit": 10,
  260. chunk_settings={
  261. "limit": 20, # separate limit for chunk vs. graph
  262. },
  263. graph_settings={
  264. "enabled": True,
  265. },
  266. },
  267. rag_generation_config: {
  268. stream: false,
  269. temperature: 0.7,
  270. max_tokens: 150
  271. }
  272. )
  273. """
  274. ),
  275. },
  276. {
  277. "lang": "JavaScript",
  278. "source": textwrap.dedent(
  279. """
  280. const { r2rClient } = require("r2r-js");
  281. const client = new r2rClient();
  282. function main() {
  283. const response = await client.retrieval.rag({
  284. query: "Who is Aristotle?",
  285. search_settings: {
  286. filters: {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}},
  287. useSemanticSearch: true,
  288. chunkSettings: {
  289. limit: 20, # separate limit for chunk vs. graph
  290. enabled: true
  291. },
  292. graphSettings: {
  293. enabled: true,
  294. },
  295. },
  296. ragGenerationConfig: {
  297. stream: false,
  298. temperature: 0.7,
  299. maxTokens: 150
  300. }
  301. });
  302. }
  303. main();
  304. """
  305. ),
  306. },
  307. {
  308. "lang": "CLI",
  309. "source": textwrap.dedent(
  310. """
  311. r2r retrieval search --query "Who is Aristotle?" --stream
  312. """
  313. ),
  314. },
  315. {
  316. "lang": "Shell",
  317. "source": textwrap.dedent(
  318. """
  319. curl -X POST "https://api.example.com/retrieval/rag" \\
  320. -H "Content-Type: application/json" \\
  321. -H "Authorization: Bearer YOUR_API_KEY" \\
  322. -d '{
  323. "query": "Who is Aristotle?",
  324. "search_settings": {
  325. "use_semantic_search": True,
  326. "filters": {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}},
  327. "limit": 10,
  328. chunk_settings={
  329. "limit": 20, # separate limit for chunk vs. graph
  330. },
  331. graph_settings={
  332. "enabled": True,
  333. },
  334. },
  335. "rag_generation_config": {
  336. stream: false,
  337. temperature: 0.7,
  338. max_tokens: 150
  339. }
  340. }'
  341. """
  342. ),
  343. },
  344. ]
  345. },
  346. )
  347. @self.base_endpoint
  348. async def rag_app(
  349. query: str = Body(...),
  350. search_mode: SearchMode = Body(
  351. default=SearchMode.custom,
  352. description=(
  353. "Default value of `custom` allows full control over search settings.\n\n"
  354. "Pre-configured search modes:\n"
  355. "`basic`: A simple semantic-based search.\n"
  356. "`advanced`: A more powerful hybrid search combining semantic and full-text.\n"
  357. "`custom`: Full control via `search_settings`.\n\n"
  358. "If `filters` or `limit` are provided alongside `basic` or `advanced`, "
  359. "they will override the default settings for that mode."
  360. ),
  361. ),
  362. search_settings: Optional[SearchSettings] = Body(
  363. None,
  364. description=(
  365. "The search configuration object. If `search_mode` is `custom`, "
  366. "these settings are used as-is. For `basic` or `advanced`, these settings will override the default mode configuration.\n\n"
  367. "Common overrides include `filters` to narrow results and `limit` to control how many results are returned."
  368. ),
  369. ),
  370. rag_generation_config: GenerationConfig = Body(
  371. default_factory=GenerationConfig,
  372. description="Configuration for RAG generation",
  373. ),
  374. task_prompt_override: Optional[str] = Body(
  375. default=None,
  376. description="Optional custom prompt to override default",
  377. ),
  378. include_title_if_available: bool = Body(
  379. default=False,
  380. description="Include document titles in responses when available",
  381. ),
  382. auth_user=Depends(self.providers.auth.auth_wrapper()),
  383. ) -> WrappedRAGResponse:
  384. """
  385. Execute a RAG (Retrieval-Augmented Generation) query.
  386. This endpoint combines search results with language model generation.
  387. It supports the same filtering capabilities as the search endpoint,
  388. allowing for precise control over the retrieved context.
  389. The generation process can be customized using the `rag_generation_config` parameter.
  390. """
  391. effective_settings = self._prepare_search_settings(
  392. auth_user, search_mode, search_settings
  393. )
  394. response = await self.services.retrieval.rag(
  395. query=query,
  396. search_settings=effective_settings,
  397. rag_generation_config=rag_generation_config,
  398. task_prompt_override=task_prompt_override,
  399. include_title_if_available=include_title_if_available,
  400. )
  401. if rag_generation_config.stream:
  402. async def stream_generator():
  403. try:
  404. async for chunk in response:
  405. yield chunk
  406. except GeneratorExit:
  407. # Clean up if needed, then return
  408. return
  409. return StreamingResponse(
  410. stream_generator(), media_type="application/json"
  411. ) # type: ignore
  412. else:
  413. return response
  414. @self.router.post(
  415. "/retrieval/agent",
  416. dependencies=[Depends(self.rate_limit_dependency)],
  417. summary="RAG-powered Conversational Agent",
  418. openapi_extra={
  419. "x-codeSamples": [
  420. {
  421. "lang": "Python",
  422. "source": textwrap.dedent(
  423. """
  424. from r2r import R2RClient
  425. client = R2RClient()
  426. # when using auth, do client.login(...)
  427. response =client.retrieval.agent(
  428. message={
  429. "role": "user",
  430. "content": "What were the key contributions of Aristotle to logic and how did they influence later philosophers?"
  431. },
  432. search_settings={
  433. "use_semantic_search": True,
  434. "filters": {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}},
  435. "limit": 10,
  436. chunk_settings={
  437. "limit": 20, # separate limit for chunk vs. graph
  438. },
  439. graph_settings={
  440. "enabled": True,
  441. },
  442. },
  443. rag_generation_config: {
  444. stream: false,
  445. temperature: 0.7,
  446. max_tokens: 150
  447. }
  448. include_title_if_available=True,
  449. conversation_id="550e8400-e29b-41d4-a716-446655440000" # Optional for conversation continuity
  450. )
  451. """
  452. ),
  453. },
  454. {
  455. "lang": "JavaScript",
  456. "source": textwrap.dedent(
  457. """
  458. const { r2rClient } = require("r2r-js");
  459. const client = new r2rClient();
  460. function main() {
  461. const response = await client.retrieval.agent({
  462. message: {
  463. role: "user",
  464. content: "What were the key contributions of Aristotle to logic and how did they influence later philosophers?"
  465. },
  466. searchSettings: {
  467. filters: {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}},
  468. useSemanticSearch: true,
  469. chunkSettings: {
  470. limit: 20, # separate limit for chunk vs. graph
  471. enabled: true
  472. },
  473. graphSettings: {
  474. enabled: true,
  475. },
  476. },
  477. ragGenerationConfig: {
  478. stream: false,
  479. temperature: 0.7,
  480. maxTokens: 150
  481. },
  482. includeTitleIfAvailable: true,
  483. conversationId: "550e8400-e29b-41d4-a716-446655440000"
  484. });
  485. }
  486. main();
  487. """
  488. ),
  489. },
  490. {
  491. "lang": "Shell",
  492. "source": textwrap.dedent(
  493. """
  494. curl -X POST "https://api.example.com/retrieval/agent" \\
  495. -H "Content-Type: application/json" \\
  496. -H "Authorization: Bearer YOUR_API_KEY" \\
  497. -d '{
  498. "message": {
  499. "role": "user",
  500. "content": "What were the key contributions of Aristotle to logic and how did they influence later philosophers?"
  501. },
  502. "search_settings": {
  503. "use_semantic_search": True,
  504. "filters": {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}},
  505. "limit": 10,
  506. chunk_settings={
  507. "limit": 20, # separate limit for chunk vs. graph
  508. },
  509. graph_settings={
  510. "enabled": True,
  511. },
  512. },
  513. "include_title_if_available": true,
  514. "conversation_id": "550e8400-e29b-41d4-a716-446655440000"
  515. }'
  516. """
  517. ),
  518. },
  519. ]
  520. },
  521. )
  522. @self.base_endpoint
  523. async def agent_app(
  524. message: Optional[Message] = Body(
  525. None,
  526. description="Current message to process",
  527. ),
  528. messages: Optional[list[Message]] = Body(
  529. None,
  530. deprecated=True,
  531. description="List of messages (deprecated, use message instead)",
  532. ),
  533. search_mode: SearchMode = Body(
  534. default=SearchMode.custom,
  535. description=(
  536. "Default value of `custom` allows full control over search settings.\n\n"
  537. "Pre-configured search modes:\n"
  538. "`basic`: A simple semantic-based search.\n"
  539. "`advanced`: A more powerful hybrid search combining semantic and full-text.\n"
  540. "`custom`: Full control via `search_settings`.\n\n"
  541. "If `filters` or `limit` are provided alongside `basic` or `advanced`, "
  542. "they will override the default settings for that mode."
  543. ),
  544. ),
  545. search_settings: Optional[SearchSettings] = Body(
  546. None,
  547. description=(
  548. "The search configuration object. If `search_mode` is `custom`, "
  549. "these settings are used as-is. For `basic` or `advanced`, these settings will override the default mode configuration.\n\n"
  550. "Common overrides include `filters` to narrow results and `limit` to control how many results are returned."
  551. ),
  552. ),
  553. rag_generation_config: GenerationConfig = Body(
  554. default_factory=GenerationConfig,
  555. description="Configuration for RAG generation",
  556. ),
  557. task_prompt_override: Optional[str] = Body(
  558. default=None,
  559. description="Optional custom prompt to override default",
  560. ),
  561. include_title_if_available: bool = Body(
  562. default=True,
  563. description="Include document titles in responses when available",
  564. ),
  565. conversation_id: Optional[UUID] = Body(
  566. default=None,
  567. description="ID of the conversation",
  568. ),
  569. auth_user=Depends(self.providers.auth.auth_wrapper()),
  570. ) -> WrappedAgentResponse:
  571. """
  572. Engage with an intelligent RAG-powered conversational agent for complex information retrieval and analysis.
  573. This advanced endpoint combines retrieval-augmented generation (RAG) with a conversational AI agent to provide
  574. detailed, context-aware responses based on your document collection. The agent can:
  575. - Maintain conversation context across multiple interactions
  576. - Dynamically search and retrieve relevant information from both vector and knowledge graph sources
  577. - Break down complex queries into sub-questions for comprehensive answers
  578. - Cite sources and provide evidence-based responses
  579. - Handle follow-up questions and clarifications
  580. - Navigate complex topics with multi-step reasoning
  581. Key Features:
  582. - Hybrid search combining vector and knowledge graph approaches
  583. - Contextual conversation management with conversation_id tracking
  584. - Customizable generation parameters for response style and length
  585. - Source document citation with optional title inclusion
  586. - Streaming support for real-time responses
  587. - Branch management for exploring different conversation paths
  588. Common Use Cases:
  589. - Research assistance and literature review
  590. - Document analysis and summarization
  591. - Technical support and troubleshooting
  592. - Educational Q&A and tutoring
  593. - Knowledge base exploration
  594. The agent uses both vector search and knowledge graph capabilities to find and synthesize
  595. information, providing detailed, factual responses with proper attribution to source documents.
  596. """
  597. effective_settings = self._prepare_search_settings(
  598. auth_user, search_mode, search_settings
  599. )
  600. try:
  601. response = await self.services.retrieval.agent(
  602. message=message,
  603. messages=messages,
  604. search_settings=effective_settings,
  605. rag_generation_config=rag_generation_config,
  606. task_prompt_override=task_prompt_override,
  607. include_title_if_available=include_title_if_available,
  608. conversation_id=(
  609. str(conversation_id) if conversation_id else None
  610. ),
  611. )
  612. if rag_generation_config.stream:
  613. async def stream_generator():
  614. try:
  615. async for chunk in response:
  616. yield chunk
  617. except GeneratorExit:
  618. # Clean up if needed, then return
  619. return
  620. return StreamingResponse(
  621. stream_generator(), media_type="application/json"
  622. ) # type: ignore
  623. else:
  624. return response
  625. except Exception as e:
  626. raise R2RException(str(e), 500)
  627. @self.router.post(
  628. "/retrieval/completion",
  629. dependencies=[Depends(self.rate_limit_dependency)],
  630. summary="Generate Message Completions",
  631. openapi_extra={
  632. "x-codeSamples": [
  633. {
  634. "lang": "Python",
  635. "source": textwrap.dedent(
  636. """
  637. from r2r import R2RClient
  638. client = R2RClient()
  639. # when using auth, do client.login(...)
  640. response =client.completion(
  641. messages=[
  642. {"role": "system", "content": "You are a helpful assistant."},
  643. {"role": "user", "content": "What is the capital of France?"},
  644. {"role": "assistant", "content": "The capital of France is Paris."},
  645. {"role": "user", "content": "What about Italy?"}
  646. ],
  647. generation_config={
  648. "model": "gpt-4o-mini",
  649. "temperature": 0.7,
  650. "max_tokens": 150,
  651. "stream": False
  652. }
  653. )
  654. """
  655. ),
  656. },
  657. {
  658. "lang": "JavaScript",
  659. "source": textwrap.dedent(
  660. """
  661. const { r2rClient } = require("r2r-js");
  662. const client = new r2rClient();
  663. function main() {
  664. const response = await client.completion({
  665. messages: [
  666. { role: "system", content: "You are a helpful assistant." },
  667. { role: "user", content: "What is the capital of France?" },
  668. { role: "assistant", content: "The capital of France is Paris." },
  669. { role: "user", content: "What about Italy?" }
  670. ],
  671. generationConfig: {
  672. model: "gpt-4o-mini",
  673. temperature: 0.7,
  674. maxTokens: 150,
  675. stream: false
  676. }
  677. });
  678. }
  679. main();
  680. """
  681. ),
  682. },
  683. {
  684. "lang": "Shell",
  685. "source": textwrap.dedent(
  686. """
  687. curl -X POST "https://api.example.com/retrieval/completion" \\
  688. -H "Content-Type: application/json" \\
  689. -H "Authorization: Bearer YOUR_API_KEY" \\
  690. -d '{
  691. "messages": [
  692. {"role": "system", "content": "You are a helpful assistant."},
  693. {"role": "user", "content": "What is the capital of France?"},
  694. {"role": "assistant", "content": "The capital of France is Paris."},
  695. {"role": "user", "content": "What about Italy?"}
  696. ],
  697. "generation_config": {
  698. "model": "gpt-4o-mini",
  699. "temperature": 0.7,
  700. "max_tokens": 150,
  701. "stream": false
  702. }
  703. }'
  704. """
  705. ),
  706. },
  707. ]
  708. },
  709. )
  710. @self.base_endpoint
  711. async def completion(
  712. messages: list[Message] = Body(
  713. ...,
  714. description="List of messages to generate completion for",
  715. example=[
  716. {
  717. "role": "system",
  718. "content": "You are a helpful assistant.",
  719. },
  720. {
  721. "role": "user",
  722. "content": "What is the capital of France?",
  723. },
  724. {
  725. "role": "assistant",
  726. "content": "The capital of France is Paris.",
  727. },
  728. {"role": "user", "content": "What about Italy?"},
  729. ],
  730. ),
  731. generation_config: GenerationConfig = Body(
  732. default_factory=GenerationConfig,
  733. description="Configuration for text generation",
  734. example={
  735. "model": "gpt-4o-mini",
  736. "temperature": 0.7,
  737. "max_tokens": 150,
  738. "stream": False,
  739. },
  740. ),
  741. auth_user=Depends(self.providers.auth.auth_wrapper()),
  742. response_model=WrappedCompletionResponse,
  743. ):
  744. """
  745. Generate completions for a list of messages.
  746. This endpoint uses the language model to generate completions for the provided messages.
  747. The generation process can be customized using the generation_config parameter.
  748. The messages list should contain alternating user and assistant messages, with an optional
  749. system message at the start. Each message should have a 'role' and 'content'.
  750. """
  751. return await self.services.retrieval.completion(
  752. messages=messages,
  753. generation_config=generation_config,
  754. )
  755. @self.router.post(
  756. "/retrieval/embedding",
  757. dependencies=[Depends(self.rate_limit_dependency)],
  758. summary="Generate Embeddings",
  759. openapi_extra={
  760. "x-codeSamples": [
  761. {
  762. "lang": "Python",
  763. "source": textwrap.dedent(
  764. """
  765. from r2r import R2RClient
  766. client = R2RClient()
  767. # when using auth, do client.login(...)
  768. result = client.retrieval.embedding(
  769. text="Who is Aristotle?",
  770. )
  771. """
  772. ),
  773. },
  774. {
  775. "lang": "JavaScript",
  776. "source": textwrap.dedent(
  777. """
  778. const { r2rClient } = require("r2r-js");
  779. const client = new r2rClient();
  780. function main() {
  781. const response = await client.retrieval.embedding({
  782. text: "Who is Aristotle?",
  783. });
  784. }
  785. main();
  786. """
  787. ),
  788. },
  789. {
  790. "lang": "Shell",
  791. "source": textwrap.dedent(
  792. """
  793. curl -X POST "https://api.example.com/retrieval/embedding" \\
  794. -H "Content-Type: application/json" \\
  795. -H "Authorization: Bearer YOUR_API_KEY" \\
  796. -d '{
  797. "text": "Who is Aristotle?",
  798. }'
  799. """
  800. ),
  801. },
  802. ]
  803. },
  804. )
  805. @self.base_endpoint
  806. async def embedding(
  807. text: str = Body(
  808. ...,
  809. description="Text to generate embeddings for",
  810. ),
  811. auth_user=Depends(self.providers.auth.auth_wrapper()),
  812. ):
  813. """
  814. Generate embeddings for the provided text using the specified model.
  815. This endpoint uses the language model to generate embeddings for the provided text.
  816. The model parameter specifies the model to use for generating embeddings.
  817. """
  818. return await self.services.retrieval.embedding(
  819. text=text,
  820. )