r2r.old.toml 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. [app]
  2. # app settings are global available like `r2r_config.agent.app`
  3. # project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var
  4. default_max_documents_per_user = 10_000
  5. default_max_chunks_per_user = 10_000_000
  6. default_max_collections_per_user = 5_000
  7. # Set the default max upload size to 2 GB for local testing
  8. default_max_upload_size = 2147483648 # 2 GB for anything not explicitly listed
  9. [app.max_upload_size_by_type]
  10. # Common text-based formats
  11. txt = 2147483648 # 2 GB
  12. md = 2147483648
  13. tsv = 2147483648
  14. csv = 2147483648
  15. xml = 2147483648
  16. html = 2147483648
  17. # Office docs
  18. doc = 2147483648
  19. docx = 2147483648
  20. ppt = 2147483648
  21. pptx = 2147483648
  22. xls = 2147483648
  23. xlsx = 2147483648
  24. odt = 2147483648
  25. # PDFs
  26. pdf = 2147483648
  27. # E-mail
  28. eml = 2147483648
  29. msg = 2147483648
  30. p7s = 2147483648
  31. # Images
  32. bmp = 2147483648
  33. heic = 2147483648
  34. jpeg = 2147483648
  35. jpg = 2147483648
  36. png = 2147483648
  37. tiff = 2147483648
  38. # E-books and other formats
  39. epub = 2147483648
  40. rtf = 2147483648
  41. rst = 2147483648
  42. org = 2147483648
  43. [agent]
  44. system_instruction_name = "rag_agent"
  45. # tool_names = ["local_search", "web_search"] # uncomment to enable web search
  46. tool_names = ["local_search"]
  47. [agent.generation_config]
  48. model = "openai/gpt-4o"
  49. [auth]
  50. provider = "r2r"
  51. access_token_lifetime_in_minutes = 60000 # set a very high default value, for easier testing
  52. refresh_token_lifetime_in_days = 7
  53. require_authentication = false
  54. require_email_verification = false
  55. default_admin_email = "xujiawei@cocorobo.cc"
  56. default_admin_password = "usestudio-1"
  57. [completion]
  58. provider = "r2r"
  59. concurrent_request_limit = 256
  60. fast_llm = "openai/gpt-4o-mini"
  61. [completion.generation_config]
  62. model = "openai/gpt-4o"
  63. temperature = 0.1
  64. top_p = 1
  65. max_tokens_to_sample = 1_024
  66. stream = false
  67. add_generation_kwargs = { }
  68. [crypto]
  69. provider = "bcrypt"
  70. [database]
  71. provider = "postgres"
  72. default_collection_name = "Default"
  73. default_collection_description = "Your default collection."
  74. # collection_summary_system_prompt = 'default_system'
  75. # collection_summary_task_prompt = 'default_collection_summary'
  76. # KG settings
  77. batch_size = 256
  78. [database.graph_creation_settings]
  79. clustering_mode = "local"
  80. graph_entity_description_prompt = "graphrag_entity_description"
  81. entity_types = [] # if empty, all entities are extracted
  82. relation_types = [] # if empty, all relations are extracted
  83. fragment_merge_count = 1 # number of fragments to merge into a single extraction
  84. max_knowledge_relationships = 100
  85. max_description_input_length = 65536
  86. generation_config = { model = "openai/gpt-4o-mini", max_tokens_to_sample = 4_096 } # and other params, model used for relationshipt extraction
  87. automatic_deduplication = true # enable automatic deduplication of entities
  88. [database.graph_enrichment_settings]
  89. community_reports_prompt = "graphrag_community_reports"
  90. max_summary_input_length = 65536
  91. generation_config = { model = "openai/gpt-4o-mini", max_tokens_to_sample = 4_096 } # and other params, model used for node description and graph clustering
  92. leiden_params = {}
  93. [database.graph_search_settings]
  94. generation_config = { model = "openai/gpt-4o-mini" }
  95. [database.limits]
  96. # Default fallback limits if no route or user-level overrides are found
  97. global_per_min = 300
  98. monthly_limit = 10000
  99. [database.route_limits]
  100. # Set the `v3/retrieval/search` route to have a maximum of 5 requests per minute
  101. "/v3/retrieval/search" = { route_per_min = 120 }
  102. "/v3/retrieval/rag" = { route_per_min = 30 }
  103. [embedding]
  104. provider = "litellm"
  105. # For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512`
  106. # RECOMMENDED - For advanced applications,
  107. # use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization
  108. #base_model = "openai/text-embedding-3-small"
  109. #base_dimension = 512
  110. base_model = "openai/text-embedding-3-large"
  111. base_dimension = 256
  112. # rerank_model = "huggingface/mixedbread-ai/mxbai-rerank-large-v1" # reranking model
  113. batch_size = 128
  114. add_title_as_prefix = false
  115. concurrent_request_limit = 256
  116. quantization_settings = { quantization_type = "FP32" }
  117. [completion_embedding]
  118. # Generally this should be the same as the embedding config, but advanced users may want to run with a different provider to reduce latency
  119. provider = "litellm"
  120. base_model = "openai/text-embedding-3-large"
  121. base_dimension = 256
  122. batch_size = 128
  123. add_title_as_prefix = false
  124. concurrent_request_limit = 256
  125. [file]
  126. provider = "postgres"
  127. [ingestion]
  128. provider = "r2r"
  129. chunking_strategy = "recursive"
  130. chunk_size = 800
  131. chunk_overlap = 400
  132. excluded_parsers = ["mp4"]
  133. # Ingestion-time document summary parameters
  134. # skip_document_summary = False
  135. # document_summary_system_prompt = 'default_system'
  136. # document_summary_task_prompt = 'default_summary'
  137. # chunks_for_document_summary = 128
  138. document_summary_model = "openai/gpt-4o-mini"
  139. vision_img_model = "openai/gpt-4o"
  140. vision_pdf_model = "openai/gpt-4o"
  141. automatic_extraction = true # enable automatic extraction of entities and relations
  142. [ingestion.chunk_enrichment_settings]
  143. enable_chunk_enrichment = false # disabled by default
  144. n_chunks = 2 # the number of chunks (both preceeding and succeeding) to use in enrichment
  145. generation_config = { model = "openai/gpt-4o-mini" }
  146. [ingestion.extra_parsers]
  147. pdf = "zerox"
  148. [logging]
  149. provider = "r2r"
  150. log_table = "logs"
  151. log_info_table = "log_info"
  152. [orchestration]
  153. provider = "simple"
  154. [prompt]
  155. provider = "r2r"
  156. [email]
  157. provider = "console_mock"