123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961 |
- import logging
- import textwrap
- from typing import Any, Literal, Optional
- from uuid import UUID
- from fastapi import Body, Depends
- from fastapi.responses import StreamingResponse
- from core.base import (
- GenerationConfig,
- Message,
- R2RException,
- SearchMode,
- SearchSettings,
- select_search_filters,
- )
- from core.base.api.models import (
- WrappedAgentResponse,
- WrappedCompletionResponse,
- WrappedEmbeddingResponse,
- WrappedLLMChatCompletion,
- WrappedRAGResponse,
- WrappedSearchResponse,
- )
- from ...abstractions import R2RProviders, R2RServices
- from ...config import R2RConfig
- from .base_router import BaseRouterV3
- logger = logging.getLogger(__name__)
- def merge_search_settings(
- base: SearchSettings, overrides: SearchSettings
- ) -> SearchSettings:
- # Convert both to dict
- base_dict = base.model_dump()
- overrides_dict = overrides.model_dump(exclude_unset=True)
- # Update base_dict with values from overrides_dict
- # This ensures that any field set in overrides takes precedence
- for k, v in overrides_dict.items():
- base_dict[k] = v
- # Construct a new SearchSettings from the merged dict
- return SearchSettings(**base_dict)
- class RetrievalRouter(BaseRouterV3):
- def __init__(
- self, providers: R2RProviders, services: R2RServices, config: R2RConfig
- ):
- logging.info("Initializing RetrievalRouter")
- super().__init__(providers, services, config)
- def _register_workflows(self):
- pass
- def _prepare_search_settings(
- self,
- auth_user: Any,
- search_mode: SearchMode,
- search_settings: Optional[SearchSettings],
- ) -> SearchSettings:
- """Prepare the effective search settings based on the provided
- search_mode, optional user-overrides in search_settings, and applied
- filters."""
- if search_mode != SearchMode.custom:
- # Start from mode defaults
- effective_settings = SearchSettings.get_default(search_mode.value)
- if search_settings:
- # Merge user-provided overrides
- effective_settings = merge_search_settings(
- effective_settings, search_settings
- )
- else:
- # Custom mode: use provided settings or defaults
- effective_settings = search_settings or SearchSettings()
- # Apply user-specific filters
- effective_settings.filters = select_search_filters(
- auth_user, effective_settings
- )
- return effective_settings
- def _setup_routes(self):
- @self.router.post(
- "/retrieval/search",
- dependencies=[Depends(self.rate_limit_dependency)],
- summary="Search R2R",
- openapi_extra={
- "x-codeSamples": [
- {
- "lang": "Python",
- "source": textwrap.dedent(
- """
- from r2r import R2RClient
- client = R2RClient()
- # if using auth, do client.login(...)
- response = client.retrieval.search(
- query="What is DeepSeek R1?",
- )
- """
- ),
- },
- {
- "lang": "JavaScript",
- "source": textwrap.dedent(
- """
- const { r2rClient } = require("r2r-js");
- const client = new r2rClient();
- // if using auth, do client.login(...)
- const response = await client.retrieval.search({
- query: "What is DeepSeek R1?",
- });
- """
- ),
- },
- {
- "lang": "Shell",
- "source": textwrap.dedent(
- """
- # Basic search
- curl -X POST "http://localhost:7272/v3/retrieval/search" \\
- -H "Content-Type: application/json" \\
- -H "Authorization: Bearer YOUR_API_KEY" \\
- -d '{
- "query": "What is DeepSeek R1?"
- }'
- """
- ),
- },
- ]
- },
- )
- @self.base_endpoint
- async def search_app(
- query: str = Body(
- ...,
- description="Search query to find relevant documents",
- ),
- search_mode: SearchMode = Body(
- default=SearchMode.custom,
- description=(
- "Default value of `custom` allows full control over search settings.\n\n"
- "Pre-configured search modes:\n"
- "`basic`: A simple semantic-based search.\n"
- "`advanced`: A more powerful hybrid search combining semantic and full-text.\n"
- "`custom`: Full control via `search_settings`.\n\n"
- "If `filters` or `limit` are provided alongside `basic` or `advanced`, "
- "they will override the default settings for that mode."
- ),
- ),
- search_settings: Optional[SearchSettings] = Body(
- None,
- description=(
- "The search configuration object. If `search_mode` is `custom`, "
- "these settings are used as-is. For `basic` or `advanced`, these settings will override the default mode configuration.\n\n"
- "Common overrides include `filters` to narrow results and `limit` to control how many results are returned."
- ),
- ),
- auth_user=Depends(self.providers.auth.auth_wrapper()),
- ) -> WrappedSearchResponse:
- """Perform a search query against vector and/or graph-based
- databases.
- **Search Modes:**
- - `basic`: Defaults to semantic search. Simple and easy to use.
- - `advanced`: Combines semantic search with full-text search for more comprehensive results.
- - `custom`: Complete control over how search is performed. Provide a full `SearchSettings` object.
- **Filters:**
- Apply filters directly inside `search_settings.filters`. For example:
- ```json
- {
- "filters": {"document_id": {"$eq": "e43864f5-a36f-548e-aacd-6f8d48b30c7f"}}
- }
- ```
- Supported operators: `$eq`, `$neq`, `$gt`, `$gte`, `$lt`, `$lte`, `$like`, `$ilike`, `$in`, `$nin`.
- **Hybrid Search:**
- Enable hybrid search by setting `use_hybrid_search: true` in search_settings. This combines semantic search with
- keyword-based search for improved results. Configure with `hybrid_settings`:
- ```json
- {
- "use_hybrid_search": true,
- "hybrid_settings": {
- "full_text_weight": 1.0,
- "semantic_weight": 5.0,
- "full_text_limit": 200,
- "rrf_k": 50
- }
- }
- ```
- **Graph-Enhanced Search:**
- Knowledge graph integration is enabled by default. Control with `graph_search_settings`:
- ```json
- {
- "graph_search_settings": {
- "use_graph_search": true,
- "kg_search_type": "local"
- }
- }
- ```
- **Advanced Filtering:**
- Use complex filters to narrow down results by metadata fields or document properties:
- ```json
- {
- "filters": {
- "$and":[
- {"document_type": {"$eq": "pdf"}},
- {"metadata.year": {"$gt": 2020}}
- ]
- }
- }
- ```
- **Results:**
- The response includes vector search results and optional graph search results.
- Each result contains the matched text, document ID, and relevance score.
- """
- if not query:
- raise R2RException("Query cannot be empty", 400)
- effective_settings = self._prepare_search_settings(
- auth_user, search_mode, search_settings
- )
- results = await self.services.retrieval.search(
- query=query,
- search_settings=effective_settings,
- )
- return results # type: ignore
- @self.router.post(
- "/retrieval/rag",
- dependencies=[Depends(self.rate_limit_dependency)],
- summary="RAG Query",
- response_model=None,
- openapi_extra={
- "x-codeSamples": [
- {
- "lang": "Python",
- "source": textwrap.dedent(
- """
- from r2r import R2RClient
- client = R2RClient()
- # when using auth, do client.login(...)
- # Basic RAG request
- response = client.retrieval.rag(
- query="What is DeepSeek R1?",
- )
- """
- ),
- },
- {
- "lang": "JavaScript",
- "source": textwrap.dedent(
- """
- const { r2rClient } = require("r2r-js");
- const client = new r2rClient();
- // when using auth, do client.login(...)
- // Basic RAG request
- const response = await client.retrieval.rag({
- query: "What is DeepSeek R1?",
- });
- """
- ),
- },
- {
- "lang": "Shell",
- "source": textwrap.dedent(
- """
- # Basic RAG request
- curl -X POST "http://localhost:7272/v3/retrieval/rag" \\
- -H "Content-Type: application/json" \\
- -H "Authorization: Bearer YOUR_API_KEY" \\
- -d '{
- "query": "What is DeepSeek R1?"
- }'
- """
- ),
- },
- ]
- },
- )
- @self.base_endpoint
- async def rag_app(
- query: str = Body(...),
- search_mode: SearchMode = Body(
- default=SearchMode.custom,
- description=(
- "Default value of `custom` allows full control over search settings.\n\n"
- "Pre-configured search modes:\n"
- "`basic`: A simple semantic-based search.\n"
- "`advanced`: A more powerful hybrid search combining semantic and full-text.\n"
- "`custom`: Full control via `search_settings`.\n\n"
- "If `filters` or `limit` are provided alongside `basic` or `advanced`, "
- "they will override the default settings for that mode."
- ),
- ),
- search_settings: Optional[SearchSettings] = Body(
- None,
- description=(
- "The search configuration object. If `search_mode` is `custom`, "
- "these settings are used as-is. For `basic` or `advanced`, these settings will override the default mode configuration.\n\n"
- "Common overrides include `filters` to narrow results and `limit` to control how many results are returned."
- ),
- ),
- rag_generation_config: GenerationConfig = Body(
- default_factory=GenerationConfig,
- description="Configuration for RAG generation",
- ),
- task_prompt: Optional[str] = Body(
- default=None,
- description="Optional custom prompt to override default",
- ),
- include_title_if_available: bool = Body(
- default=False,
- description="Include document titles in responses when available",
- ),
- include_web_search: bool = Body(
- default=False,
- description="Include web search results provided to the LLM.",
- ),
- auth_user=Depends(self.providers.auth.auth_wrapper()),
- ) -> WrappedRAGResponse:
- """Execute a RAG (Retrieval-Augmented Generation) query.
- This endpoint combines search results with language model generation to produce accurate,
- contextually-relevant responses based on your document corpus.
- **Features:**
- - Combines vector search, optional knowledge graph integration, and LLM generation
- - Automatically cites sources with unique citation identifiers
- - Supports both streaming and non-streaming responses
- - Compatible with various LLM providers (OpenAI, Anthropic, etc.)
- - Web search integration for up-to-date information
- **Search Configuration:**
- All search parameters from the search endpoint apply here, including filters, hybrid search, and graph-enhanced search.
- **Generation Configuration:**
- Fine-tune the language model's behavior with `rag_generation_config`:
- ```json
- {
- "model": "openai/gpt-4.1-mini", // Model to use
- "temperature": 0.7, // Control randomness (0-1)
- "max_tokens": 1500, // Maximum output length
- "stream": true // Enable token streaming
- }
- ```
- **Model Support:**
- - OpenAI models (default)
- - Anthropic Claude models (requires ANTHROPIC_API_KEY)
- - Local models via Ollama
- - Any provider supported by LiteLLM
- **Streaming Responses:**
- When `stream: true` is set, the endpoint returns Server-Sent Events with the following types:
- - `search_results`: Initial search results from your documents
- - `message`: Partial tokens as they're generated
- - `citation`: Citation metadata when sources are referenced
- - `final_answer`: Complete answer with structured citations
- **Example Response:**
- ```json
- {
- "generated_answer": "DeepSeek-R1 is a model that demonstrates impressive performance...[1]",
- "search_results": { ... },
- "citations": [
- {
- "id": "cit.123456",
- "object": "citation",
- "payload": { ... }
- }
- ]
- }
- ```
- """
- if "model" not in rag_generation_config.model_fields_set:
- rag_generation_config.model = self.config.app.quality_llm
- effective_settings = self._prepare_search_settings(
- auth_user, search_mode, search_settings
- )
- response = await self.services.retrieval.rag(
- query=query,
- search_settings=effective_settings,
- rag_generation_config=rag_generation_config,
- task_prompt=task_prompt,
- include_title_if_available=include_title_if_available,
- include_web_search=include_web_search,
- )
- if rag_generation_config.stream:
- # ========== Streaming path ==========
- async def stream_generator():
- try:
- async for chunk in response:
- if len(chunk) > 1024:
- for i in range(0, len(chunk), 1024):
- yield chunk[i : i + 1024]
- else:
- yield chunk
- except GeneratorExit:
- # Clean up if needed, then return
- return
- return StreamingResponse(
- stream_generator(), media_type="text/event-stream"
- ) # type: ignore
- else:
- return response
- @self.router.post(
- "/retrieval/agent",
- dependencies=[Depends(self.rate_limit_dependency)],
- summary="RAG-powered Conversational Agent",
- openapi_extra={
- "x-codeSamples": [
- {
- "lang": "Python",
- "source": textwrap.dedent(
- """
- from r2r import (
- R2RClient,
- ThinkingEvent,
- ToolCallEvent,
- ToolResultEvent,
- CitationEvent,
- FinalAnswerEvent,
- MessageEvent,
- )
- client = R2RClient()
- # when using auth, do client.login(...)
- # Basic synchronous request
- response = client.retrieval.agent(
- message={
- "role": "user",
- "content": "Do a deep analysis of the philosophical implications of DeepSeek R1"
- },
- rag_tools=["web_search", "web_scrape", "search_file_descriptions", "search_file_knowledge", "get_file_content"],
- )
- """
- ),
- },
- {
- "lang": "JavaScript",
- "source": textwrap.dedent(
- """
- const { r2rClient } = require("r2r-js");
- const client = new r2rClient();
- // when using auth, do client.login(...)
- async function main() {
- // Basic synchronous request
- const ragResponse = await client.retrieval.agent({
- message: {
- role: "user",
- content: "Do a deep analysis of the philosophical implications of DeepSeek R1"
- },
- ragTools: ["web_search", "web_scrape", "search_file_descriptions", "search_file_knowledge", "get_file_content"]
- });
- }
- main();
- """
- ),
- },
- {
- "lang": "Shell",
- "source": textwrap.dedent(
- """
- # Basic request
- curl -X POST "http://localhost:7272/v3/retrieval/agent" \\
- -H "Content-Type: application/json" \\
- -H "Authorization: Bearer YOUR_API_KEY" \\
- -d '{
- "message": {
- "role": "user",
- "content": "What were the key contributions of Aristotle to logic?"
- },
- "search_settings": {
- "use_semantic_search": true,
- "filters": {"document_id": {"$eq": "e43864f5-a36f-548e-aacd-6f8d48b30c7f"}}
- },
- "rag_tools": ["search_file_knowledge", "get_file_content", "web_search"]
- }'
- # Advanced analysis with extended thinking
- curl -X POST "http://localhost:7272/v3/retrieval/agent" \\
- -H "Content-Type: application/json" \\
- -H "Authorization: Bearer YOUR_API_KEY" \\
- -d '{
- "message": {
- "role": "user",
- "content": "Do a deep analysis of the philosophical implications of DeepSeek R1"
- },
- "search_settings": {"limit": 20},
- "research_tools": ["rag", "reasoning", "critique", "python_executor"],
- "rag_generation_config": {
- "model": "anthropic/claude-3-7-sonnet-20250219",
- "extended_thinking": true,
- "thinking_budget": 4096,
- "temperature": 1,
- "top_p": null,
- "max_tokens": 16000,
- "stream": False
- }
- }'
- # Conversation continuation
- curl -X POST "http://localhost:7272/v3/retrieval/agent" \\
- -H "Content-Type: application/json" \\
- -H "Authorization: Bearer YOUR_API_KEY" \\
- -d '{
- "message": {
- "role": "user",
- "content": "How does it compare to other reasoning models?"
- },
- "conversation_id": "YOUR_CONVERSATION_ID"
- }'
- """
- ),
- },
- ]
- },
- )
- @self.base_endpoint
- async def agent_app(
- message: Optional[Message] = Body(
- None,
- description="Current message to process",
- ),
- messages: Optional[list[Message]] = Body(
- None,
- deprecated=True,
- description="List of messages (deprecated, use message instead)",
- ),
- search_mode: SearchMode = Body(
- default=SearchMode.custom,
- description="Pre-configured search modes: basic, advanced, or custom.",
- ),
- search_settings: Optional[SearchSettings] = Body(
- None,
- description="The search configuration object for retrieving context.",
- ),
- # Generation configurations
- rag_generation_config: GenerationConfig = Body(
- default_factory=GenerationConfig,
- description="Configuration for RAG generation in 'rag' mode",
- ),
- research_generation_config: Optional[GenerationConfig] = Body(
- None,
- description="Configuration for generation in 'research' mode. If not provided but mode='research', rag_generation_config will be used with appropriate model overrides.",
- ),
- # Tool configurations
- # FIXME: We need a more generic way to handle this
- rag_tools: Optional[
- list[
- Literal[
- "web_search",
- "web_scrape",
- "search_file_descriptions",
- "search_file_knowledge",
- "get_file_content",
- ]
- ]
- ] = Body(
- None,
- description="List of tools to enable for RAG mode. Available tools: search_file_knowledge, get_file_content, web_search, web_scrape, search_file_descriptions",
- ),
- # FIXME: We need a more generic way to handle this
- research_tools: Optional[
- list[
- Literal["rag", "reasoning", "critique", "python_executor"]
- ]
- ] = Body(
- None,
- description="List of tools to enable for Research mode. Available tools: rag, reasoning, critique, python_executor",
- ),
- # Backward compatibility
- task_prompt: Optional[str] = Body(
- default=None,
- description="Optional custom prompt to override default",
- ),
- # Backward compatibility
- include_title_if_available: bool = Body(
- default=True,
- description="Pass document titles from search results into the LLM context window.",
- ),
- conversation_id: Optional[UUID] = Body(
- default=None,
- description="ID of the conversation",
- ),
- max_tool_context_length: Optional[int] = Body(
- default=32_768,
- description="Maximum length of returned tool context",
- ),
- use_system_context: Optional[bool] = Body(
- default=True,
- description="Use extended prompt for generation",
- ),
- # FIXME: We need a more generic way to handle this
- mode: Optional[Literal["rag", "research"]] = Body(
- default="rag",
- description="Mode to use for generation: 'rag' for standard retrieval or 'research' for deep analysis with reasoning capabilities",
- ),
- needs_initial_conversation_name: Optional[bool] = Body(
- default=None,
- description="If true, the system will automatically assign a conversation name if not already specified previously.",
- ),
- auth_user=Depends(self.providers.auth.auth_wrapper()),
- ) -> WrappedAgentResponse:
- """
- Engage with an intelligent agent for information retrieval, analysis, and research.
- This endpoint offers two operating modes:
- - **RAG mode**: Standard retrieval-augmented generation for answering questions based on knowledge base
- - **Research mode**: Advanced capabilities for deep analysis, reasoning, and computation
- ### RAG Mode (Default)
- The RAG mode provides fast, knowledge-based responses using:
- - Semantic and hybrid search capabilities
- - Document-level and chunk-level content retrieval
- - Optional web search integration
- - Source citation and evidence-based responses
- ### Research Mode
- The Research mode builds on RAG capabilities and adds:
- - A dedicated reasoning system for complex problem-solving
- - Critique capabilities to identify potential biases or logical fallacies
- - Python execution for computational analysis
- - Multi-step reasoning for deeper exploration of topics
- ### Available Tools
- **RAG Tools:**
- - `search_file_knowledge`: Semantic/hybrid search on your ingested documents
- - `search_file_descriptions`: Search over file-level metadata
- - `content`: Fetch entire documents or chunk structures
- - `web_search`: Query external search APIs for up-to-date information
- - `web_scrape`: Scrape and extract content from specific web pages
- **Research Tools:**
- - `rag`: Leverage the underlying RAG agent for information retrieval
- - `reasoning`: Call a dedicated model for complex analytical thinking
- - `critique`: Analyze conversation history to identify flaws and biases
- - `python_executor`: Execute Python code for complex calculations and analysis
- ### Streaming Output
- When streaming is enabled, the agent produces different event types:
- - `thinking`: Shows the model's step-by-step reasoning (when extended_thinking=true)
- - `tool_call`: Shows when the agent invokes a tool
- - `tool_result`: Shows the result of a tool call
- - `citation`: Indicates when a citation is added to the response
- - `message`: Streams partial tokens of the response
- - `final_answer`: Contains the complete generated answer and structured citations
- ### Conversations
- Maintain context across multiple turns by including `conversation_id` in each request.
- After your first call, store the returned `conversation_id` and include it in subsequent calls.
- If no conversation name has already been set for the conversation, the system will automatically assign one.
- """
- # Handle model selection based on mode
- if "model" not in rag_generation_config.model_fields_set:
- if mode == "rag":
- rag_generation_config.model = self.config.app.quality_llm
- elif mode == "research":
- rag_generation_config.model = self.config.app.planning_llm
- # Prepare search settings
- effective_settings = self._prepare_search_settings(
- auth_user, search_mode, search_settings
- )
- # Determine effective generation config
- effective_generation_config = rag_generation_config
- if mode == "research" and research_generation_config:
- effective_generation_config = research_generation_config
- try:
- response = await self.services.retrieval.agent(
- message=message,
- messages=messages,
- search_settings=effective_settings,
- rag_generation_config=rag_generation_config,
- research_generation_config=research_generation_config,
- task_prompt=task_prompt,
- include_title_if_available=include_title_if_available,
- max_tool_context_length=max_tool_context_length or 32_768,
- conversation_id=(
- str(conversation_id) if conversation_id else None # type: ignore
- ),
- use_system_context=use_system_context
- if use_system_context is not None
- else True,
- rag_tools=rag_tools, # type: ignore
- research_tools=research_tools, # type: ignore
- mode=mode,
- needs_initial_conversation_name=needs_initial_conversation_name,
- )
- if effective_generation_config.stream:
- async def stream_generator():
- try:
- async for chunk in response:
- if len(chunk) > 1024:
- for i in range(0, len(chunk), 1024):
- yield chunk[i : i + 1024]
- else:
- yield chunk
- except GeneratorExit:
- # Clean up if needed, then return
- return
- return StreamingResponse( # type: ignore
- stream_generator(), media_type="text/event-stream"
- )
- else:
- return response
- except Exception as e:
- logger.error(f"Error in agent_app: {e}")
- raise R2RException(str(e), 500) from e
- @self.router.post(
- "/retrieval/completion",
- dependencies=[Depends(self.rate_limit_dependency)],
- summary="Generate Message Completions",
- openapi_extra={
- "x-codeSamples": [
- {
- "lang": "Python",
- "source": textwrap.dedent(
- """
- from r2r import R2RClient
- client = R2RClient()
- # when using auth, do client.login(...)
- response = client.completion(
- messages=[
- {"role": "system", "content": "You are a helpful assistant."},
- {"role": "user", "content": "What is the capital of France?"},
- {"role": "assistant", "content": "The capital of France is Paris."},
- {"role": "user", "content": "What about Italy?"}
- ],
- generation_config={
- "model": "openai/gpt-4.1-mini",
- "temperature": 0.7,
- "max_tokens": 150,
- "stream": False
- }
- )
- """
- ),
- },
- {
- "lang": "JavaScript",
- "source": textwrap.dedent(
- """
- const { r2rClient } = require("r2r-js");
- const client = new r2rClient();
- // when using auth, do client.login(...)
- async function main() {
- const response = await client.completion({
- messages: [
- { role: "system", content: "You are a helpful assistant." },
- { role: "user", content: "What is the capital of France?" },
- { role: "assistant", content: "The capital of France is Paris." },
- { role: "user", content: "What about Italy?" }
- ],
- generationConfig: {
- model: "openai/gpt-4.1-mini",
- temperature: 0.7,
- maxTokens: 150,
- stream: false
- }
- });
- }
- main();
- """
- ),
- },
- {
- "lang": "Shell",
- "source": textwrap.dedent(
- """
- curl -X POST "http://localhost:7272/v3/retrieval/completion" \\
- -H "Content-Type: application/json" \\
- -H "Authorization: Bearer YOUR_API_KEY" \\
- -d '{
- "messages": [
- {"role": "system", "content": "You are a helpful assistant."},
- {"role": "user", "content": "What is the capital of France?"},
- {"role": "assistant", "content": "The capital of France is Paris."},
- {"role": "user", "content": "What about Italy?"}
- ],
- "generation_config": {
- "model": "openai/gpt-4.1-mini",
- "temperature": 0.7,
- "max_tokens": 150,
- "stream": false
- }
- }'
- """
- ),
- },
- ]
- },
- )
- @self.base_endpoint
- async def completion(
- messages: list[Message] = Body(
- ...,
- description="List of messages to generate completion for",
- example=[
- {
- "role": "system",
- "content": "You are a helpful assistant.",
- },
- {
- "role": "user",
- "content": "What is the capital of France?",
- },
- {
- "role": "assistant",
- "content": "The capital of France is Paris.",
- },
- {"role": "user", "content": "What about Italy?"},
- ],
- ),
- generation_config: GenerationConfig = Body(
- default_factory=GenerationConfig,
- description="Configuration for text generation",
- example={
- "model": "openai/gpt-4.1-mini",
- "temperature": 0.7,
- "max_tokens": 150,
- "stream": False,
- },
- ),
- auth_user=Depends(self.providers.auth.auth_wrapper()),
- response_model=WrappedCompletionResponse,
- ) -> WrappedLLMChatCompletion:
- """Generate completions for a list of messages.
- This endpoint uses the language model to generate completions for
- the provided messages. The generation process can be customized
- using the generation_config parameter.
- The messages list should contain alternating user and assistant
- messages, with an optional system message at the start. Each
- message should have a 'role' and 'content'.
- """
- return await self.services.retrieval.completion(
- messages=messages, # type: ignore
- generation_config=generation_config,
- )
- @self.router.post(
- "/retrieval/embedding",
- dependencies=[Depends(self.rate_limit_dependency)],
- summary="Generate Embeddings",
- openapi_extra={
- "x-codeSamples": [
- {
- "lang": "Python",
- "source": textwrap.dedent(
- """
- from r2r import R2RClient
- client = R2RClient()
- # when using auth, do client.login(...)
- result = client.retrieval.embedding(
- text="What is DeepSeek R1?",
- )
- """
- ),
- },
- {
- "lang": "JavaScript",
- "source": textwrap.dedent(
- """
- const { r2rClient } = require("r2r-js");
- const client = new r2rClient();
- // when using auth, do client.login(...)
- async function main() {
- const response = await client.retrieval.embedding({
- text: "What is DeepSeek R1?",
- });
- }
- main();
- """
- ),
- },
- {
- "lang": "Shell",
- "source": textwrap.dedent(
- """
- curl -X POST "http://localhost:7272/v3/retrieval/embedding" \\
- -H "Content-Type: application/json" \\
- -H "Authorization: Bearer YOUR_API_KEY" \\
- -d '{
- "text": "What is DeepSeek R1?",
- }'
- """
- ),
- },
- ]
- },
- )
- @self.base_endpoint
- async def embedding(
- text: str = Body(
- ...,
- description="Text to generate embeddings for",
- ),
- auth_user=Depends(self.providers.auth.auth_wrapper()),
- ) -> WrappedEmbeddingResponse:
- """Generate embeddings for the provided text using the specified
- model.
- This endpoint uses the language model to generate embeddings for
- the provided text. The model parameter specifies the model to use
- for generating embeddings.
- """
- return await self.services.retrieval.embedding(
- text=text,
- )
|