[app] # app settings are global available like `r2r_config.agent.app` # project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var default_max_documents_per_user = 100 default_max_chunks_per_user = 100_000 default_max_collections_per_user = 10 [agent] system_instruction_name = "rag_agent" # tool_names = ["local_search", "web_search"] # uncomment to enable web search tool_names = ["local_search"] [agent.generation_config] model = "openai/gpt-4o" [auth] provider = "r2r" access_token_lifetime_in_minutes = 60 refresh_token_lifetime_in_days = 7 require_authentication = false require_email_verification = false default_admin_email = "xujiawei@cocorobo.cc" default_admin_password = "usestudio-1" #default_admin_email = "admin@example.com" #default_admin_password = "change_me_immediately" [completion] provider = "litellm" concurrent_request_limit = 64 [completion.generation_config] model = "openai/gpt-4o" temperature = 0.1 top_p = 1 max_tokens_to_sample = 1_024 stream = false add_generation_kwargs = { } [crypto] provider = "bcrypt" [database] provider = "postgres" default_collection_name = "Default" default_collection_description = "Your default collection." # collection_summary_system_prompt = 'default_system' # collection_summary_task_prompt = 'default_collection_summary' # KG settings batch_size = 256 [database.graph_creation_settings] clustering_mode = "local" graph_entity_description_prompt = "graphrag_entity_description" entity_types = [] # if empty, all entities are extracted relation_types = [] # if empty, all relations are extracted fragment_merge_count = 1 # number of fragments to merge into a single extraction max_knowledge_relationships = 100 max_description_input_length = 65536 generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for relationshipt extraction [database.graph_entity_deduplication_settings] graph_entity_deduplication_type = "by_name" graph_entity_deduplication_prompt = "graphrag_entity_deduplication" max_description_input_length = 65536 generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for deduplication [database.graph_enrichment_settings] community_reports_prompt = "graphrag_community_reports" max_summary_input_length = 65536 generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for node description and graph clustering leiden_params = {} [database.graph_search_settings] generation_config = { model = "openai/gpt-4o-mini" } [database.limits] # Default fallback limits if no route or user-level overrides are found global_per_min = 300 monthly_limit = 10000 [database.route_limits] # Set the `v3/retrieval/search` route to have a maximum of 5 requests per minute "/v3/retrieval/search" = { route_per_min = 120 } "/v3/retrieval/rag" = { route_per_min = 30 } [embedding] provider = "litellm" # For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512` # RECOMMENDED - For advanced applications, # use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization base_model = "openai/text-embedding-3-large" base_dimension = 256 # rerank_model = "huggingface/mixedbread-ai/mxbai-rerank-large-v1" # reranking model batch_size = 128 add_title_as_prefix = false concurrent_request_limit = 256 quantization_settings = { quantization_type = "FP32" } [file] provider = "postgres" [ingestion] provider = "r2r" chunking_strategy = "recursive" chunk_size = 800 chunk_overlap = 400 excluded_parsers = ["mp4"] # Ingestion-time document summary parameters # skip_document_summary = False # document_summary_system_prompt = 'default_system' # document_summary_task_prompt = 'default_summary' # chunks_for_document_summary = 128 document_summary_model = "openai/gpt-4o-mini" vision_img_model = "openai/gpt-4o" vision_pdf_model = "openai/gpt-4o" [ingestion.chunk_enrichment_settings] enable_chunk_enrichment = false # disabled by default strategies = ["semantic", "neighborhood"] forward_chunks = 3 backward_chunks = 3 semantic_neighbors = 10 semantic_similarity_threshold = 0.7 generation_config = { model = "openai/gpt-4o-mini" } [ingestion.extra_parsers] pdf = "zerox" [logging] provider = "r2r" log_table = "logs" log_info_table = "log_info" [orchestration] provider = "simple" [prompt] provider = "r2r" [email] provider = "console_mock"