r2r.toml 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. [app]
  2. # app settings are global available like `r2r_config.agent.app`
  3. # project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var
  4. default_max_documents_per_user = 100
  5. default_max_chunks_per_user = 100_000
  6. default_max_collections_per_user = 10
  7. [agent]
  8. system_instruction_name = "rag_agent"
  9. # tool_names = ["local_search", "web_search"] # uncomment to enable web search
  10. tool_names = ["local_search"]
  11. [agent.generation_config]
  12. model = "openai/gpt-4o"
  13. [auth]
  14. provider = "r2r"
  15. access_token_lifetime_in_minutes = 60
  16. refresh_token_lifetime_in_days = 7
  17. require_authentication = false
  18. require_email_verification = false
  19. default_admin_email = "admin@example.com"
  20. default_admin_password = "change_me_immediately"
  21. [completion]
  22. provider = "litellm"
  23. concurrent_request_limit = 64
  24. [completion.generation_config]
  25. model = "openai/gpt-4o"
  26. temperature = 0.1
  27. top_p = 1
  28. max_tokens_to_sample = 1_024
  29. stream = false
  30. add_generation_kwargs = { }
  31. [crypto]
  32. provider = "bcrypt"
  33. [database]
  34. provider = "postgres"
  35. default_collection_name = "Default"
  36. default_collection_description = "Your default collection."
  37. # collection_summary_system_prompt = 'default_system'
  38. # collection_summary_task_prompt = 'default_collection_summary'
  39. # KG settings
  40. batch_size = 256
  41. [database.graph_creation_settings]
  42. clustering_mode = "local"
  43. graph_entity_description_prompt = "graphrag_entity_description"
  44. entity_types = [] # if empty, all entities are extracted
  45. relation_types = [] # if empty, all relations are extracted
  46. fragment_merge_count = 1 # number of fragments to merge into a single extraction
  47. max_knowledge_relationships = 100
  48. max_description_input_length = 65536
  49. generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for relationshipt extraction
  50. [database.graph_entity_deduplication_settings]
  51. graph_entity_deduplication_type = "by_name"
  52. graph_entity_deduplication_prompt = "graphrag_entity_deduplication"
  53. max_description_input_length = 65536
  54. generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for deduplication
  55. [database.graph_enrichment_settings]
  56. community_reports_prompt = "graphrag_community_reports"
  57. max_summary_input_length = 65536
  58. generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for node description and graph clustering
  59. leiden_params = {}
  60. [database.graph_search_settings]
  61. generation_config = { model = "openai/gpt-4o-mini" }
  62. [database.limits]
  63. # Default fallback limits if no route or user-level overrides are found
  64. global_per_min = 300
  65. monthly_limit = 10000
  66. [database.route_limits]
  67. # Set the `v3/retrieval/search` route to have a maximum of 5 requests per minute
  68. "/v3/retrieval/search" = { route_per_min = 120 }
  69. "/v3/retrieval/rag" = { route_per_min = 30 }
  70. [embedding]
  71. provider = "litellm"
  72. # For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512`
  73. # RECOMMENDED - For advanced applications,
  74. # use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization
  75. base_model = "openai/text-embedding-3-large"
  76. base_dimension = 256
  77. # rerank_model = "huggingface/mixedbread-ai/mxbai-rerank-large-v1" # reranking model
  78. batch_size = 128
  79. add_title_as_prefix = false
  80. concurrent_request_limit = 256
  81. quantization_settings = { quantization_type = "FP32" }
  82. [file]
  83. provider = "postgres"
  84. [ingestion]
  85. provider = "r2r"
  86. chunking_strategy = "recursive"
  87. chunk_size = 800
  88. chunk_overlap = 400
  89. excluded_parsers = ["mp4"]
  90. # Ingestion-time document summary parameters
  91. # skip_document_summary = False
  92. # document_summary_system_prompt = 'default_system'
  93. # document_summary_task_prompt = 'default_summary'
  94. # chunks_for_document_summary = 128
  95. document_summary_model = "openai/gpt-4o-mini"
  96. vision_img_model = "openai/gpt-4o"
  97. vision_pdf_model = "openai/gpt-4o"
  98. [ingestion.chunk_enrichment_settings]
  99. enable_chunk_enrichment = false # disabled by default
  100. strategies = ["semantic", "neighborhood"]
  101. forward_chunks = 3
  102. backward_chunks = 3
  103. semantic_neighbors = 10
  104. semantic_similarity_threshold = 0.7
  105. generation_config = { model = "openai/gpt-4o-mini" }
  106. [ingestion.extra_parsers]
  107. pdf = "zerox"
  108. [logging]
  109. provider = "r2r"
  110. log_table = "logs"
  111. log_info_table = "log_info"
  112. [orchestration]
  113. provider = "simple"
  114. [prompt]
  115. provider = "r2r"
  116. [email]
  117. provider = "console_mock"