123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146 |
- [app]
- # app settings are global available like `r2r_config.agent.app`
- # project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var
- default_max_documents_per_user = 100
- default_max_chunks_per_user = 100_000
- default_max_collections_per_user = 10
- [agent]
- system_instruction_name = "rag_agent"
- # tool_names = ["local_search", "web_search"] # uncomment to enable web search
- tool_names = ["local_search"]
- [agent.generation_config]
- model = "openai/gpt-4o"
- [auth]
- provider = "r2r"
- access_token_lifetime_in_minutes = 60
- refresh_token_lifetime_in_days = 7
- require_authentication = false
- require_email_verification = false
- default_admin_email = "admin@example.com"
- default_admin_password = "change_me_immediately"
- [completion]
- provider = "litellm"
- concurrent_request_limit = 64
- [completion.generation_config]
- model = "openai/gpt-4o"
- temperature = 0.1
- top_p = 1
- max_tokens_to_sample = 1_024
- stream = false
- add_generation_kwargs = { }
- [crypto]
- provider = "bcrypt"
- [database]
- provider = "postgres"
- default_collection_name = "Default"
- default_collection_description = "Your default collection."
- # collection_summary_system_prompt = 'default_system'
- # collection_summary_task_prompt = 'default_collection_summary'
- # KG settings
- batch_size = 256
- [database.graph_creation_settings]
- clustering_mode = "local"
- graph_entity_description_prompt = "graphrag_entity_description"
- entity_types = [] # if empty, all entities are extracted
- relation_types = [] # if empty, all relations are extracted
- fragment_merge_count = 1 # number of fragments to merge into a single extraction
- max_knowledge_relationships = 100
- max_description_input_length = 65536
- generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for relationshipt extraction
- [database.graph_entity_deduplication_settings]
- graph_entity_deduplication_type = "by_name"
- graph_entity_deduplication_prompt = "graphrag_entity_deduplication"
- max_description_input_length = 65536
- generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for deduplication
- [database.graph_enrichment_settings]
- community_reports_prompt = "graphrag_community_reports"
- max_summary_input_length = 65536
- generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for node description and graph clustering
- leiden_params = {}
- [database.graph_search_settings]
- generation_config = { model = "openai/gpt-4o-mini" }
- [database.limits]
- # Default fallback limits if no route or user-level overrides are found
- global_per_min = 300
- monthly_limit = 10000
- [database.route_limits]
- # Set the `v3/retrieval/search` route to have a maximum of 5 requests per minute
- "/v3/retrieval/search" = { route_per_min = 120 }
- "/v3/retrieval/rag" = { route_per_min = 30 }
- [embedding]
- provider = "litellm"
- # For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512`
- # RECOMMENDED - For advanced applications,
- # use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization
- base_model = "openai/text-embedding-3-large"
- base_dimension = 3072
- # rerank_model = "huggingface/mixedbread-ai/mxbai-rerank-large-v1" # reranking model
- batch_size = 128
- add_title_as_prefix = false
- concurrent_request_limit = 256
- quantization_settings = { quantization_type = "FP32" }
- [file]
- provider = "postgres"
- [ingestion]
- provider = "r2r"
- chunking_strategy = "recursive"
- chunk_size = 800
- chunk_overlap = 400
- excluded_parsers = ["mp4"]
- # Ingestion-time document summary parameters
- # skip_document_summary = False
- # document_summary_system_prompt = 'default_system'
- # document_summary_task_prompt = 'default_summary'
- # chunks_for_document_summary = 128
- document_summary_model = "openai/gpt-4o-mini"
- vision_img_model = "openai/gpt-4o"
- vision_pdf_model = "openai/gpt-4o"
- [ingestion.chunk_enrichment_settings]
- enable_chunk_enrichment = false # disabled by default
- strategies = ["semantic", "neighborhood"]
- forward_chunks = 3
- backward_chunks = 3
- semantic_neighbors = 10
- semantic_similarity_threshold = 0.7
- generation_config = { model = "openai/gpt-4o-mini" }
- [ingestion.extra_parsers]
- pdf = "zerox"
- [logging]
- provider = "r2r"
- log_table = "logs"
- log_info_table = "log_info"
- [orchestration]
- provider = "simple"
- [prompt]
- provider = "r2r"
- [email]
- provider = "console_mock"
|