r2r.toml 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. [app]
  2. # app settings are global available like `r2r_config.agent.app`
  3. # project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var
  4. [agent]
  5. system_instruction_name = "rag_agent"
  6. # tool_names = ["local_search", "web_search"] # uncomment to enable web search
  7. tool_names = ["local_search"]
  8. [agent.generation_config]
  9. model = "openai/gpt-4o"
  10. [auth]
  11. provider = "r2r"
  12. access_token_lifetime_in_minutes = 60
  13. refresh_token_lifetime_in_days = 7
  14. require_authentication = true
  15. require_email_verification = true
  16. default_admin_email = "xujiawei@cocorobo.cc"
  17. default_admin_password = "usestudio-1"
  18. [completion]
  19. provider = "litellm"
  20. concurrent_request_limit = 64
  21. [completion.generation_config]
  22. model = "openai/gpt-4o"
  23. temperature = 0.1
  24. top_p = 1
  25. max_tokens_to_sample = 1_024
  26. stream = false
  27. add_generation_kwargs = { }
  28. [crypto]
  29. provider = "bcrypt"
  30. [database]
  31. provider = "postgres"
  32. default_collection_name = "Default"
  33. default_collection_description = "cocorobo collection."
  34. enable_fts = true # whether or not to enable full-text search, e.g `hybrid search`
  35. # KG settings
  36. batch_size = 256
  37. [database.graph_creation_settings]
  38. clustering_mode = "local"
  39. graph_entity_description_prompt = "graphrag_entity_description"
  40. entity_types = [] # if empty, all entities are extracted
  41. relation_types = [] # if empty, all relations are extracted
  42. fragment_merge_count = 1 # number of fragments to merge into a single extraction
  43. max_knowledge_relationships = 100
  44. max_description_input_length = 65536
  45. generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for relationshipt extraction
  46. [database.graph_entity_deduplication_settings]
  47. graph_entity_deduplication_type = "by_name"
  48. graph_entity_deduplication_prompt = "graphrag_entity_deduplication"
  49. max_description_input_length = 65536
  50. generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for deduplication
  51. [database.graph_enrichment_settings]
  52. community_reports_prompt = "graphrag_community_reports"
  53. max_summary_input_length = 65536
  54. generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for node description and graph clustering
  55. leiden_params = {}
  56. [database.graph_search_settings]
  57. generation_config = { model = "openai/gpt-4o-mini" }
  58. [database.limits]
  59. # Default fallback limits if no route or user-level overrides are found
  60. global_per_min = 300
  61. monthly_limit = 10000
  62. [database.route_limits]
  63. # Set the `v3/retrieval/search` route to have a maximum of 5 requests per minute
  64. "/v3/retrieval/search" = { route_per_min = 120 }
  65. "/v3/retrieval/rag" = { route_per_min = 30 }
  66. [embedding]
  67. provider = "litellm"
  68. # For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512`
  69. # RECOMMENDED - For advanced applications,
  70. # use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization
  71. base_model = "openai/text-embedding-3-large"
  72. base_dimension = 3072
  73. # rerank_model = "huggingface/mixedbread-ai/mxbai-rerank-large-v1" # reranking model
  74. batch_size = 128
  75. add_title_as_prefix = false
  76. concurrent_request_limit = 25600
  77. quantization_settings = { quantization_type = "FP32" }
  78. [file]
  79. provider = "postgres"
  80. [ingestion]
  81. provider = "r2r"
  82. chunking_strategy = "recursive"
  83. chunk_size = 800
  84. chunk_overlap = 400
  85. excluded_parsers = ["mp4"]
  86. # Ingestion-time document summary parameters
  87. # skip_document_summary = False
  88. # document_summary_system_prompt = 'default_system'
  89. # document_summary_task_prompt = 'default_summary'
  90. # chunks_for_document_summary = 128
  91. document_summary_model = "openai/gpt-4o-mini"
  92. vision_img_model = "openai/gpt-4o"
  93. vision_pdf_model = "openai/gpt-4o"
  94. [ingestion.chunk_enrichment_settings]
  95. enable_chunk_enrichment = false # disabled by default
  96. strategies = ["semantic", "neighborhood"]
  97. forward_chunks = 3
  98. backward_chunks = 3
  99. semantic_neighbors = 10
  100. semantic_similarity_threshold = 0.7
  101. generation_config = { model = "openai/gpt-4o-mini" }
  102. [ingestion.extra_parsers]
  103. pdf = "zerox"
  104. [logging]
  105. provider = "r2r"
  106. log_table = "logs"
  107. log_info_table = "log_info"
  108. [orchestration]
  109. provider = "simple"
  110. [prompt]
  111. provider = "r2r"
  112. [email]
  113. provider = "console_mock"