r2r.toml 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. [app]
  2. # app settings are global available like `r2r_config.agent.app`
  3. # project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var
  4. default_max_documents_per_user = 100
  5. default_max_chunks_per_user = 100_000
  6. default_max_collections_per_user = 10
  7. [agent]
  8. system_instruction_name = "rag_agent"
  9. # tool_names = ["local_search", "web_search"] # uncomment to enable web search
  10. tool_names = ["local_search"]
  11. [agent.generation_config]
  12. model = "openai/gpt-4o"
  13. [auth]
  14. provider = "r2r"
  15. access_token_lifetime_in_minutes = 60
  16. refresh_token_lifetime_in_days = 7
  17. require_authentication = false
  18. require_email_verification = false
  19. default_admin_email = "xujiawei@cocorobo.cc"
  20. default_admin_password = "usestudio-1"
  21. #default_admin_email = "admin@example.com"
  22. #default_admin_password = "change_me_immediately"
  23. [completion]
  24. provider = "litellm"
  25. concurrent_request_limit = 64
  26. [completion.generation_config]
  27. model = "openai/gpt-4o"
  28. temperature = 0.1
  29. top_p = 1
  30. max_tokens_to_sample = 1_024
  31. stream = false
  32. add_generation_kwargs = { }
  33. [crypto]
  34. provider = "bcrypt"
  35. [database]
  36. provider = "postgres"
  37. default_collection_name = "Default"
  38. default_collection_description = "Your default collection."
  39. # collection_summary_system_prompt = 'default_system'
  40. # collection_summary_task_prompt = 'default_collection_summary'
  41. # KG settings
  42. batch_size = 256
  43. [database.graph_creation_settings]
  44. clustering_mode = "local"
  45. graph_entity_description_prompt = "graphrag_entity_description"
  46. entity_types = [] # if empty, all entities are extracted
  47. relation_types = [] # if empty, all relations are extracted
  48. fragment_merge_count = 1 # number of fragments to merge into a single extraction
  49. max_knowledge_relationships = 100
  50. max_description_input_length = 65536
  51. generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for relationshipt extraction
  52. [database.graph_entity_deduplication_settings]
  53. graph_entity_deduplication_type = "by_name"
  54. graph_entity_deduplication_prompt = "graphrag_entity_deduplication"
  55. max_description_input_length = 65536
  56. generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for deduplication
  57. [database.graph_enrichment_settings]
  58. community_reports_prompt = "graphrag_community_reports"
  59. max_summary_input_length = 65536
  60. generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for node description and graph clustering
  61. leiden_params = {}
  62. [database.graph_search_settings]
  63. generation_config = { model = "openai/gpt-4o-mini" }
  64. [database.limits]
  65. # Default fallback limits if no route or user-level overrides are found
  66. global_per_min = 300
  67. monthly_limit = 10000
  68. [database.route_limits]
  69. # Set the `v3/retrieval/search` route to have a maximum of 5 requests per minute
  70. "/v3/retrieval/search" = { route_per_min = 120 }
  71. "/v3/retrieval/rag" = { route_per_min = 30 }
  72. [embedding]
  73. provider = "litellm"
  74. # For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512`
  75. # RECOMMENDED - For advanced applications,
  76. # use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization
  77. base_model = "openai/text-embedding-3-large"
  78. base_dimension = 256
  79. # rerank_model = "huggingface/mixedbread-ai/mxbai-rerank-large-v1" # reranking model
  80. batch_size = 128
  81. add_title_as_prefix = false
  82. concurrent_request_limit = 256
  83. quantization_settings = { quantization_type = "FP32" }
  84. [file]
  85. provider = "postgres"
  86. [ingestion]
  87. provider = "r2r"
  88. chunking_strategy = "recursive"
  89. chunk_size = 800
  90. chunk_overlap = 400
  91. excluded_parsers = ["mp4"]
  92. # Ingestion-time document summary parameters
  93. # skip_document_summary = False
  94. # document_summary_system_prompt = 'default_system'
  95. # document_summary_task_prompt = 'default_summary'
  96. # chunks_for_document_summary = 128
  97. document_summary_model = "openai/gpt-4o-mini"
  98. vision_img_model = "openai/gpt-4o"
  99. vision_pdf_model = "openai/gpt-4o"
  100. [ingestion.chunk_enrichment_settings]
  101. enable_chunk_enrichment = false # disabled by default
  102. strategies = ["semantic", "neighborhood"]
  103. forward_chunks = 3
  104. backward_chunks = 3
  105. semantic_neighbors = 10
  106. semantic_similarity_threshold = 0.7
  107. generation_config = { model = "openai/gpt-4o-mini" }
  108. [ingestion.extra_parsers]
  109. pdf = "zerox"
  110. [logging]
  111. provider = "r2r"
  112. log_table = "logs"
  113. log_info_table = "log_info"
  114. [orchestration]
  115. provider = "simple"
  116. [prompt]
  117. provider = "r2r"
  118. [email]
  119. provider = "console_mock"