retrieval_router.py 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961
  1. import logging
  2. import textwrap
  3. from typing import Any, Literal, Optional
  4. from uuid import UUID
  5. from fastapi import Body, Depends
  6. from fastapi.responses import StreamingResponse
  7. from core.base import (
  8. GenerationConfig,
  9. Message,
  10. R2RException,
  11. SearchMode,
  12. SearchSettings,
  13. select_search_filters,
  14. )
  15. from core.base.api.models import (
  16. WrappedAgentResponse,
  17. WrappedCompletionResponse,
  18. WrappedEmbeddingResponse,
  19. WrappedLLMChatCompletion,
  20. WrappedRAGResponse,
  21. WrappedSearchResponse,
  22. )
  23. from ...abstractions import R2RProviders, R2RServices
  24. from ...config import R2RConfig
  25. from .base_router import BaseRouterV3
  26. logger = logging.getLogger(__name__)
  27. def merge_search_settings(
  28. base: SearchSettings, overrides: SearchSettings
  29. ) -> SearchSettings:
  30. # Convert both to dict
  31. base_dict = base.model_dump()
  32. overrides_dict = overrides.model_dump(exclude_unset=True)
  33. # Update base_dict with values from overrides_dict
  34. # This ensures that any field set in overrides takes precedence
  35. for k, v in overrides_dict.items():
  36. base_dict[k] = v
  37. # Construct a new SearchSettings from the merged dict
  38. return SearchSettings(**base_dict)
  39. class RetrievalRouter(BaseRouterV3):
  40. def __init__(
  41. self, providers: R2RProviders, services: R2RServices, config: R2RConfig
  42. ):
  43. logging.info("Initializing RetrievalRouter")
  44. super().__init__(providers, services, config)
  45. def _register_workflows(self):
  46. pass
  47. def _prepare_search_settings(
  48. self,
  49. auth_user: Any,
  50. search_mode: SearchMode,
  51. search_settings: Optional[SearchSettings],
  52. ) -> SearchSettings:
  53. """Prepare the effective search settings based on the provided
  54. search_mode, optional user-overrides in search_settings, and applied
  55. filters."""
  56. if search_mode != SearchMode.custom:
  57. # Start from mode defaults
  58. effective_settings = SearchSettings.get_default(search_mode.value)
  59. if search_settings:
  60. # Merge user-provided overrides
  61. effective_settings = merge_search_settings(
  62. effective_settings, search_settings
  63. )
  64. else:
  65. # Custom mode: use provided settings or defaults
  66. effective_settings = search_settings or SearchSettings()
  67. # Apply user-specific filters
  68. effective_settings.filters = select_search_filters(
  69. auth_user, effective_settings
  70. )
  71. return effective_settings
  72. def _setup_routes(self):
  73. @self.router.post(
  74. "/retrieval/search",
  75. dependencies=[Depends(self.rate_limit_dependency)],
  76. summary="Search R2R",
  77. openapi_extra={
  78. "x-codeSamples": [
  79. {
  80. "lang": "Python",
  81. "source": textwrap.dedent(
  82. """
  83. from r2r import R2RClient
  84. client = R2RClient()
  85. # if using auth, do client.login(...)
  86. response = client.retrieval.search(
  87. query="What is DeepSeek R1?",
  88. )
  89. """
  90. ),
  91. },
  92. {
  93. "lang": "JavaScript",
  94. "source": textwrap.dedent(
  95. """
  96. const { r2rClient } = require("r2r-js");
  97. const client = new r2rClient();
  98. // if using auth, do client.login(...)
  99. const response = await client.retrieval.search({
  100. query: "What is DeepSeek R1?",
  101. });
  102. """
  103. ),
  104. },
  105. {
  106. "lang": "Shell",
  107. "source": textwrap.dedent(
  108. """
  109. # Basic search
  110. curl -X POST "http://localhost:7272/v3/retrieval/search" \\
  111. -H "Content-Type: application/json" \\
  112. -H "Authorization: Bearer YOUR_API_KEY" \\
  113. -d '{
  114. "query": "What is DeepSeek R1?"
  115. }'
  116. """
  117. ),
  118. },
  119. ]
  120. },
  121. )
  122. @self.base_endpoint
  123. async def search_app(
  124. query: str = Body(
  125. ...,
  126. description="Search query to find relevant documents",
  127. ),
  128. search_mode: SearchMode = Body(
  129. default=SearchMode.custom,
  130. description=(
  131. "Default value of `custom` allows full control over search settings.\n\n"
  132. "Pre-configured search modes:\n"
  133. "`basic`: A simple semantic-based search.\n"
  134. "`advanced`: A more powerful hybrid search combining semantic and full-text.\n"
  135. "`custom`: Full control via `search_settings`.\n\n"
  136. "If `filters` or `limit` are provided alongside `basic` or `advanced`, "
  137. "they will override the default settings for that mode."
  138. ),
  139. ),
  140. search_settings: Optional[SearchSettings] = Body(
  141. None,
  142. description=(
  143. "The search configuration object. If `search_mode` is `custom`, "
  144. "these settings are used as-is. For `basic` or `advanced`, these settings will override the default mode configuration.\n\n"
  145. "Common overrides include `filters` to narrow results and `limit` to control how many results are returned."
  146. ),
  147. ),
  148. auth_user=Depends(self.providers.auth.auth_wrapper()),
  149. ) -> WrappedSearchResponse:
  150. """Perform a search query against vector and/or graph-based
  151. databases.
  152. **Search Modes:**
  153. - `basic`: Defaults to semantic search. Simple and easy to use.
  154. - `advanced`: Combines semantic search with full-text search for more comprehensive results.
  155. - `custom`: Complete control over how search is performed. Provide a full `SearchSettings` object.
  156. **Filters:**
  157. Apply filters directly inside `search_settings.filters`. For example:
  158. ```json
  159. {
  160. "filters": {"document_id": {"$eq": "e43864f5-a36f-548e-aacd-6f8d48b30c7f"}}
  161. }
  162. ```
  163. Supported operators: `$eq`, `$neq`, `$gt`, `$gte`, `$lt`, `$lte`, `$like`, `$ilike`, `$in`, `$nin`.
  164. **Hybrid Search:**
  165. Enable hybrid search by setting `use_hybrid_search: true` in search_settings. This combines semantic search with
  166. keyword-based search for improved results. Configure with `hybrid_settings`:
  167. ```json
  168. {
  169. "use_hybrid_search": true,
  170. "hybrid_settings": {
  171. "full_text_weight": 1.0,
  172. "semantic_weight": 5.0,
  173. "full_text_limit": 200,
  174. "rrf_k": 50
  175. }
  176. }
  177. ```
  178. **Graph-Enhanced Search:**
  179. Knowledge graph integration is enabled by default. Control with `graph_search_settings`:
  180. ```json
  181. {
  182. "graph_search_settings": {
  183. "use_graph_search": true,
  184. "kg_search_type": "local"
  185. }
  186. }
  187. ```
  188. **Advanced Filtering:**
  189. Use complex filters to narrow down results by metadata fields or document properties:
  190. ```json
  191. {
  192. "filters": {
  193. "$and":[
  194. {"document_type": {"$eq": "pdf"}},
  195. {"metadata.year": {"$gt": 2020}}
  196. ]
  197. }
  198. }
  199. ```
  200. **Results:**
  201. The response includes vector search results and optional graph search results.
  202. Each result contains the matched text, document ID, and relevance score.
  203. """
  204. if not query:
  205. raise R2RException("Query cannot be empty", 400)
  206. effective_settings = self._prepare_search_settings(
  207. auth_user, search_mode, search_settings
  208. )
  209. results = await self.services.retrieval.search(
  210. query=query,
  211. search_settings=effective_settings,
  212. )
  213. return results # type: ignore
  214. @self.router.post(
  215. "/retrieval/rag",
  216. dependencies=[Depends(self.rate_limit_dependency)],
  217. summary="RAG Query",
  218. response_model=None,
  219. openapi_extra={
  220. "x-codeSamples": [
  221. {
  222. "lang": "Python",
  223. "source": textwrap.dedent(
  224. """
  225. from r2r import R2RClient
  226. client = R2RClient()
  227. # when using auth, do client.login(...)
  228. # Basic RAG request
  229. response = client.retrieval.rag(
  230. query="What is DeepSeek R1?",
  231. )
  232. """
  233. ),
  234. },
  235. {
  236. "lang": "JavaScript",
  237. "source": textwrap.dedent(
  238. """
  239. const { r2rClient } = require("r2r-js");
  240. const client = new r2rClient();
  241. // when using auth, do client.login(...)
  242. // Basic RAG request
  243. const response = await client.retrieval.rag({
  244. query: "What is DeepSeek R1?",
  245. });
  246. """
  247. ),
  248. },
  249. {
  250. "lang": "Shell",
  251. "source": textwrap.dedent(
  252. """
  253. # Basic RAG request
  254. curl -X POST "http://localhost:7272/v3/retrieval/rag" \\
  255. -H "Content-Type: application/json" \\
  256. -H "Authorization: Bearer YOUR_API_KEY" \\
  257. -d '{
  258. "query": "What is DeepSeek R1?"
  259. }'
  260. """
  261. ),
  262. },
  263. ]
  264. },
  265. )
  266. @self.base_endpoint
  267. async def rag_app(
  268. query: str = Body(...),
  269. search_mode: SearchMode = Body(
  270. default=SearchMode.custom,
  271. description=(
  272. "Default value of `custom` allows full control over search settings.\n\n"
  273. "Pre-configured search modes:\n"
  274. "`basic`: A simple semantic-based search.\n"
  275. "`advanced`: A more powerful hybrid search combining semantic and full-text.\n"
  276. "`custom`: Full control via `search_settings`.\n\n"
  277. "If `filters` or `limit` are provided alongside `basic` or `advanced`, "
  278. "they will override the default settings for that mode."
  279. ),
  280. ),
  281. search_settings: Optional[SearchSettings] = Body(
  282. None,
  283. description=(
  284. "The search configuration object. If `search_mode` is `custom`, "
  285. "these settings are used as-is. For `basic` or `advanced`, these settings will override the default mode configuration.\n\n"
  286. "Common overrides include `filters` to narrow results and `limit` to control how many results are returned."
  287. ),
  288. ),
  289. rag_generation_config: GenerationConfig = Body(
  290. default_factory=GenerationConfig,
  291. description="Configuration for RAG generation",
  292. ),
  293. task_prompt: Optional[str] = Body(
  294. default=None,
  295. description="Optional custom prompt to override default",
  296. ),
  297. include_title_if_available: bool = Body(
  298. default=False,
  299. description="Include document titles in responses when available",
  300. ),
  301. include_web_search: bool = Body(
  302. default=False,
  303. description="Include web search results provided to the LLM.",
  304. ),
  305. auth_user=Depends(self.providers.auth.auth_wrapper()),
  306. ) -> WrappedRAGResponse:
  307. """Execute a RAG (Retrieval-Augmented Generation) query.
  308. This endpoint combines search results with language model generation to produce accurate,
  309. contextually-relevant responses based on your document corpus.
  310. **Features:**
  311. - Combines vector search, optional knowledge graph integration, and LLM generation
  312. - Automatically cites sources with unique citation identifiers
  313. - Supports both streaming and non-streaming responses
  314. - Compatible with various LLM providers (OpenAI, Anthropic, etc.)
  315. - Web search integration for up-to-date information
  316. **Search Configuration:**
  317. All search parameters from the search endpoint apply here, including filters, hybrid search, and graph-enhanced search.
  318. **Generation Configuration:**
  319. Fine-tune the language model's behavior with `rag_generation_config`:
  320. ```json
  321. {
  322. "model": "openai/gpt-4.1-mini", // Model to use
  323. "temperature": 0.7, // Control randomness (0-1)
  324. "max_tokens": 1500, // Maximum output length
  325. "stream": true // Enable token streaming
  326. }
  327. ```
  328. **Model Support:**
  329. - OpenAI models (default)
  330. - Anthropic Claude models (requires ANTHROPIC_API_KEY)
  331. - Local models via Ollama
  332. - Any provider supported by LiteLLM
  333. **Streaming Responses:**
  334. When `stream: true` is set, the endpoint returns Server-Sent Events with the following types:
  335. - `search_results`: Initial search results from your documents
  336. - `message`: Partial tokens as they're generated
  337. - `citation`: Citation metadata when sources are referenced
  338. - `final_answer`: Complete answer with structured citations
  339. **Example Response:**
  340. ```json
  341. {
  342. "generated_answer": "DeepSeek-R1 is a model that demonstrates impressive performance...[1]",
  343. "search_results": { ... },
  344. "citations": [
  345. {
  346. "id": "cit.123456",
  347. "object": "citation",
  348. "payload": { ... }
  349. }
  350. ]
  351. }
  352. ```
  353. """
  354. if "model" not in rag_generation_config.model_fields_set:
  355. rag_generation_config.model = self.config.app.quality_llm
  356. effective_settings = self._prepare_search_settings(
  357. auth_user, search_mode, search_settings
  358. )
  359. response = await self.services.retrieval.rag(
  360. query=query,
  361. search_settings=effective_settings,
  362. rag_generation_config=rag_generation_config,
  363. task_prompt=task_prompt,
  364. include_title_if_available=include_title_if_available,
  365. include_web_search=include_web_search,
  366. )
  367. if rag_generation_config.stream:
  368. # ========== Streaming path ==========
  369. async def stream_generator():
  370. try:
  371. async for chunk in response:
  372. if len(chunk) > 1024:
  373. for i in range(0, len(chunk), 1024):
  374. yield chunk[i : i + 1024]
  375. else:
  376. yield chunk
  377. except GeneratorExit:
  378. # Clean up if needed, then return
  379. return
  380. return StreamingResponse(
  381. stream_generator(), media_type="text/event-stream"
  382. ) # type: ignore
  383. else:
  384. return response
  385. @self.router.post(
  386. "/retrieval/agent",
  387. dependencies=[Depends(self.rate_limit_dependency)],
  388. summary="RAG-powered Conversational Agent",
  389. openapi_extra={
  390. "x-codeSamples": [
  391. {
  392. "lang": "Python",
  393. "source": textwrap.dedent(
  394. """
  395. from r2r import (
  396. R2RClient,
  397. ThinkingEvent,
  398. ToolCallEvent,
  399. ToolResultEvent,
  400. CitationEvent,
  401. FinalAnswerEvent,
  402. MessageEvent,
  403. )
  404. client = R2RClient()
  405. # when using auth, do client.login(...)
  406. # Basic synchronous request
  407. response = client.retrieval.agent(
  408. message={
  409. "role": "user",
  410. "content": "Do a deep analysis of the philosophical implications of DeepSeek R1"
  411. },
  412. rag_tools=["web_search", "web_scrape", "search_file_descriptions", "search_file_knowledge", "get_file_content"],
  413. )
  414. """
  415. ),
  416. },
  417. {
  418. "lang": "JavaScript",
  419. "source": textwrap.dedent(
  420. """
  421. const { r2rClient } = require("r2r-js");
  422. const client = new r2rClient();
  423. // when using auth, do client.login(...)
  424. async function main() {
  425. // Basic synchronous request
  426. const ragResponse = await client.retrieval.agent({
  427. message: {
  428. role: "user",
  429. content: "Do a deep analysis of the philosophical implications of DeepSeek R1"
  430. },
  431. ragTools: ["web_search", "web_scrape", "search_file_descriptions", "search_file_knowledge", "get_file_content"]
  432. });
  433. }
  434. main();
  435. """
  436. ),
  437. },
  438. {
  439. "lang": "Shell",
  440. "source": textwrap.dedent(
  441. """
  442. # Basic request
  443. curl -X POST "http://localhost:7272/v3/retrieval/agent" \\
  444. -H "Content-Type: application/json" \\
  445. -H "Authorization: Bearer YOUR_API_KEY" \\
  446. -d '{
  447. "message": {
  448. "role": "user",
  449. "content": "What were the key contributions of Aristotle to logic?"
  450. },
  451. "search_settings": {
  452. "use_semantic_search": true,
  453. "filters": {"document_id": {"$eq": "e43864f5-a36f-548e-aacd-6f8d48b30c7f"}}
  454. },
  455. "rag_tools": ["search_file_knowledge", "get_file_content", "web_search"]
  456. }'
  457. # Advanced analysis with extended thinking
  458. curl -X POST "http://localhost:7272/v3/retrieval/agent" \\
  459. -H "Content-Type: application/json" \\
  460. -H "Authorization: Bearer YOUR_API_KEY" \\
  461. -d '{
  462. "message": {
  463. "role": "user",
  464. "content": "Do a deep analysis of the philosophical implications of DeepSeek R1"
  465. },
  466. "search_settings": {"limit": 20},
  467. "research_tools": ["rag", "reasoning", "critique", "python_executor"],
  468. "rag_generation_config": {
  469. "model": "anthropic/claude-3-7-sonnet-20250219",
  470. "extended_thinking": true,
  471. "thinking_budget": 4096,
  472. "temperature": 1,
  473. "top_p": null,
  474. "max_tokens": 16000,
  475. "stream": False
  476. }
  477. }'
  478. # Conversation continuation
  479. curl -X POST "http://localhost:7272/v3/retrieval/agent" \\
  480. -H "Content-Type: application/json" \\
  481. -H "Authorization: Bearer YOUR_API_KEY" \\
  482. -d '{
  483. "message": {
  484. "role": "user",
  485. "content": "How does it compare to other reasoning models?"
  486. },
  487. "conversation_id": "YOUR_CONVERSATION_ID"
  488. }'
  489. """
  490. ),
  491. },
  492. ]
  493. },
  494. )
  495. @self.base_endpoint
  496. async def agent_app(
  497. message: Optional[Message] = Body(
  498. None,
  499. description="Current message to process",
  500. ),
  501. messages: Optional[list[Message]] = Body(
  502. None,
  503. deprecated=True,
  504. description="List of messages (deprecated, use message instead)",
  505. ),
  506. search_mode: SearchMode = Body(
  507. default=SearchMode.custom,
  508. description="Pre-configured search modes: basic, advanced, or custom.",
  509. ),
  510. search_settings: Optional[SearchSettings] = Body(
  511. None,
  512. description="The search configuration object for retrieving context.",
  513. ),
  514. # Generation configurations
  515. rag_generation_config: GenerationConfig = Body(
  516. default_factory=GenerationConfig,
  517. description="Configuration for RAG generation in 'rag' mode",
  518. ),
  519. research_generation_config: Optional[GenerationConfig] = Body(
  520. None,
  521. description="Configuration for generation in 'research' mode. If not provided but mode='research', rag_generation_config will be used with appropriate model overrides.",
  522. ),
  523. # Tool configurations
  524. # FIXME: We need a more generic way to handle this
  525. rag_tools: Optional[
  526. list[
  527. Literal[
  528. "web_search",
  529. "web_scrape",
  530. "search_file_descriptions",
  531. "search_file_knowledge",
  532. "get_file_content",
  533. ]
  534. ]
  535. ] = Body(
  536. None,
  537. description="List of tools to enable for RAG mode. Available tools: search_file_knowledge, get_file_content, web_search, web_scrape, search_file_descriptions",
  538. ),
  539. # FIXME: We need a more generic way to handle this
  540. research_tools: Optional[
  541. list[
  542. Literal["rag", "reasoning", "critique", "python_executor"]
  543. ]
  544. ] = Body(
  545. None,
  546. description="List of tools to enable for Research mode. Available tools: rag, reasoning, critique, python_executor",
  547. ),
  548. # Backward compatibility
  549. task_prompt: Optional[str] = Body(
  550. default=None,
  551. description="Optional custom prompt to override default",
  552. ),
  553. # Backward compatibility
  554. include_title_if_available: bool = Body(
  555. default=True,
  556. description="Pass document titles from search results into the LLM context window.",
  557. ),
  558. conversation_id: Optional[UUID] = Body(
  559. default=None,
  560. description="ID of the conversation",
  561. ),
  562. max_tool_context_length: Optional[int] = Body(
  563. default=32_768,
  564. description="Maximum length of returned tool context",
  565. ),
  566. use_system_context: Optional[bool] = Body(
  567. default=True,
  568. description="Use extended prompt for generation",
  569. ),
  570. # FIXME: We need a more generic way to handle this
  571. mode: Optional[Literal["rag", "research"]] = Body(
  572. default="rag",
  573. description="Mode to use for generation: 'rag' for standard retrieval or 'research' for deep analysis with reasoning capabilities",
  574. ),
  575. needs_initial_conversation_name: Optional[bool] = Body(
  576. default=None,
  577. description="If true, the system will automatically assign a conversation name if not already specified previously.",
  578. ),
  579. auth_user=Depends(self.providers.auth.auth_wrapper()),
  580. ) -> WrappedAgentResponse:
  581. """
  582. Engage with an intelligent agent for information retrieval, analysis, and research.
  583. This endpoint offers two operating modes:
  584. - **RAG mode**: Standard retrieval-augmented generation for answering questions based on knowledge base
  585. - **Research mode**: Advanced capabilities for deep analysis, reasoning, and computation
  586. ### RAG Mode (Default)
  587. The RAG mode provides fast, knowledge-based responses using:
  588. - Semantic and hybrid search capabilities
  589. - Document-level and chunk-level content retrieval
  590. - Optional web search integration
  591. - Source citation and evidence-based responses
  592. ### Research Mode
  593. The Research mode builds on RAG capabilities and adds:
  594. - A dedicated reasoning system for complex problem-solving
  595. - Critique capabilities to identify potential biases or logical fallacies
  596. - Python execution for computational analysis
  597. - Multi-step reasoning for deeper exploration of topics
  598. ### Available Tools
  599. **RAG Tools:**
  600. - `search_file_knowledge`: Semantic/hybrid search on your ingested documents
  601. - `search_file_descriptions`: Search over file-level metadata
  602. - `content`: Fetch entire documents or chunk structures
  603. - `web_search`: Query external search APIs for up-to-date information
  604. - `web_scrape`: Scrape and extract content from specific web pages
  605. **Research Tools:**
  606. - `rag`: Leverage the underlying RAG agent for information retrieval
  607. - `reasoning`: Call a dedicated model for complex analytical thinking
  608. - `critique`: Analyze conversation history to identify flaws and biases
  609. - `python_executor`: Execute Python code for complex calculations and analysis
  610. ### Streaming Output
  611. When streaming is enabled, the agent produces different event types:
  612. - `thinking`: Shows the model's step-by-step reasoning (when extended_thinking=true)
  613. - `tool_call`: Shows when the agent invokes a tool
  614. - `tool_result`: Shows the result of a tool call
  615. - `citation`: Indicates when a citation is added to the response
  616. - `message`: Streams partial tokens of the response
  617. - `final_answer`: Contains the complete generated answer and structured citations
  618. ### Conversations
  619. Maintain context across multiple turns by including `conversation_id` in each request.
  620. After your first call, store the returned `conversation_id` and include it in subsequent calls.
  621. If no conversation name has already been set for the conversation, the system will automatically assign one.
  622. """
  623. # Handle model selection based on mode
  624. if "model" not in rag_generation_config.model_fields_set:
  625. if mode == "rag":
  626. rag_generation_config.model = self.config.app.quality_llm
  627. elif mode == "research":
  628. rag_generation_config.model = self.config.app.planning_llm
  629. # Prepare search settings
  630. effective_settings = self._prepare_search_settings(
  631. auth_user, search_mode, search_settings
  632. )
  633. # Determine effective generation config
  634. effective_generation_config = rag_generation_config
  635. if mode == "research" and research_generation_config:
  636. effective_generation_config = research_generation_config
  637. try:
  638. response = await self.services.retrieval.agent(
  639. message=message,
  640. messages=messages,
  641. search_settings=effective_settings,
  642. rag_generation_config=rag_generation_config,
  643. research_generation_config=research_generation_config,
  644. task_prompt=task_prompt,
  645. include_title_if_available=include_title_if_available,
  646. max_tool_context_length=max_tool_context_length or 32_768,
  647. conversation_id=(
  648. str(conversation_id) if conversation_id else None # type: ignore
  649. ),
  650. use_system_context=use_system_context
  651. if use_system_context is not None
  652. else True,
  653. rag_tools=rag_tools, # type: ignore
  654. research_tools=research_tools, # type: ignore
  655. mode=mode,
  656. needs_initial_conversation_name=needs_initial_conversation_name,
  657. )
  658. if effective_generation_config.stream:
  659. async def stream_generator():
  660. try:
  661. async for chunk in response:
  662. if len(chunk) > 1024:
  663. for i in range(0, len(chunk), 1024):
  664. yield chunk[i : i + 1024]
  665. else:
  666. yield chunk
  667. except GeneratorExit:
  668. # Clean up if needed, then return
  669. return
  670. return StreamingResponse( # type: ignore
  671. stream_generator(), media_type="text/event-stream"
  672. )
  673. else:
  674. return response
  675. except Exception as e:
  676. logger.error(f"Error in agent_app: {e}")
  677. raise R2RException(str(e), 500) from e
  678. @self.router.post(
  679. "/retrieval/completion",
  680. dependencies=[Depends(self.rate_limit_dependency)],
  681. summary="Generate Message Completions",
  682. openapi_extra={
  683. "x-codeSamples": [
  684. {
  685. "lang": "Python",
  686. "source": textwrap.dedent(
  687. """
  688. from r2r import R2RClient
  689. client = R2RClient()
  690. # when using auth, do client.login(...)
  691. response = client.completion(
  692. messages=[
  693. {"role": "system", "content": "You are a helpful assistant."},
  694. {"role": "user", "content": "What is the capital of France?"},
  695. {"role": "assistant", "content": "The capital of France is Paris."},
  696. {"role": "user", "content": "What about Italy?"}
  697. ],
  698. generation_config={
  699. "model": "openai/gpt-4.1-mini",
  700. "temperature": 0.7,
  701. "max_tokens": 150,
  702. "stream": False
  703. }
  704. )
  705. """
  706. ),
  707. },
  708. {
  709. "lang": "JavaScript",
  710. "source": textwrap.dedent(
  711. """
  712. const { r2rClient } = require("r2r-js");
  713. const client = new r2rClient();
  714. // when using auth, do client.login(...)
  715. async function main() {
  716. const response = await client.completion({
  717. messages: [
  718. { role: "system", content: "You are a helpful assistant." },
  719. { role: "user", content: "What is the capital of France?" },
  720. { role: "assistant", content: "The capital of France is Paris." },
  721. { role: "user", content: "What about Italy?" }
  722. ],
  723. generationConfig: {
  724. model: "openai/gpt-4.1-mini",
  725. temperature: 0.7,
  726. maxTokens: 150,
  727. stream: false
  728. }
  729. });
  730. }
  731. main();
  732. """
  733. ),
  734. },
  735. {
  736. "lang": "Shell",
  737. "source": textwrap.dedent(
  738. """
  739. curl -X POST "http://localhost:7272/v3/retrieval/completion" \\
  740. -H "Content-Type: application/json" \\
  741. -H "Authorization: Bearer YOUR_API_KEY" \\
  742. -d '{
  743. "messages": [
  744. {"role": "system", "content": "You are a helpful assistant."},
  745. {"role": "user", "content": "What is the capital of France?"},
  746. {"role": "assistant", "content": "The capital of France is Paris."},
  747. {"role": "user", "content": "What about Italy?"}
  748. ],
  749. "generation_config": {
  750. "model": "openai/gpt-4.1-mini",
  751. "temperature": 0.7,
  752. "max_tokens": 150,
  753. "stream": false
  754. }
  755. }'
  756. """
  757. ),
  758. },
  759. ]
  760. },
  761. )
  762. @self.base_endpoint
  763. async def completion(
  764. messages: list[Message] = Body(
  765. ...,
  766. description="List of messages to generate completion for",
  767. example=[
  768. {
  769. "role": "system",
  770. "content": "You are a helpful assistant.",
  771. },
  772. {
  773. "role": "user",
  774. "content": "What is the capital of France?",
  775. },
  776. {
  777. "role": "assistant",
  778. "content": "The capital of France is Paris.",
  779. },
  780. {"role": "user", "content": "What about Italy?"},
  781. ],
  782. ),
  783. generation_config: GenerationConfig = Body(
  784. default_factory=GenerationConfig,
  785. description="Configuration for text generation",
  786. example={
  787. "model": "openai/gpt-4.1-mini",
  788. "temperature": 0.7,
  789. "max_tokens": 150,
  790. "stream": False,
  791. },
  792. ),
  793. auth_user=Depends(self.providers.auth.auth_wrapper()),
  794. response_model=WrappedCompletionResponse,
  795. ) -> WrappedLLMChatCompletion:
  796. """Generate completions for a list of messages.
  797. This endpoint uses the language model to generate completions for
  798. the provided messages. The generation process can be customized
  799. using the generation_config parameter.
  800. The messages list should contain alternating user and assistant
  801. messages, with an optional system message at the start. Each
  802. message should have a 'role' and 'content'.
  803. """
  804. return await self.services.retrieval.completion(
  805. messages=messages, # type: ignore
  806. generation_config=generation_config,
  807. )
  808. @self.router.post(
  809. "/retrieval/embedding",
  810. dependencies=[Depends(self.rate_limit_dependency)],
  811. summary="Generate Embeddings",
  812. openapi_extra={
  813. "x-codeSamples": [
  814. {
  815. "lang": "Python",
  816. "source": textwrap.dedent(
  817. """
  818. from r2r import R2RClient
  819. client = R2RClient()
  820. # when using auth, do client.login(...)
  821. result = client.retrieval.embedding(
  822. text="What is DeepSeek R1?",
  823. )
  824. """
  825. ),
  826. },
  827. {
  828. "lang": "JavaScript",
  829. "source": textwrap.dedent(
  830. """
  831. const { r2rClient } = require("r2r-js");
  832. const client = new r2rClient();
  833. // when using auth, do client.login(...)
  834. async function main() {
  835. const response = await client.retrieval.embedding({
  836. text: "What is DeepSeek R1?",
  837. });
  838. }
  839. main();
  840. """
  841. ),
  842. },
  843. {
  844. "lang": "Shell",
  845. "source": textwrap.dedent(
  846. """
  847. curl -X POST "http://localhost:7272/v3/retrieval/embedding" \\
  848. -H "Content-Type: application/json" \\
  849. -H "Authorization: Bearer YOUR_API_KEY" \\
  850. -d '{
  851. "text": "What is DeepSeek R1?",
  852. }'
  853. """
  854. ),
  855. },
  856. ]
  857. },
  858. )
  859. @self.base_endpoint
  860. async def embedding(
  861. text: str = Body(
  862. ...,
  863. description="Text to generate embeddings for",
  864. ),
  865. auth_user=Depends(self.providers.auth.auth_wrapper()),
  866. ) -> WrappedEmbeddingResponse:
  867. """Generate embeddings for the provided text using the specified
  868. model.
  869. This endpoint uses the language model to generate embeddings for
  870. the provided text. The model parameter specifies the model to use
  871. for generating embeddings.
  872. """
  873. return await self.services.retrieval.embedding(
  874. text=text,
  875. )