r2r.toml.back 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. [app]
  2. # app settings are global available like `r2r_config.agent.app`
  3. # project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var
  4. [agent]
  5. system_instruction_name = "rag_agent"
  6. # tool_names = ["local_search", "web_search"] # uncomment to enable web search
  7. tool_names = ["local_search"]
  8. [agent.generation_config]
  9. model = "openai/gpt-4o"
  10. [auth]
  11. provider = "r2r"
  12. access_token_lifetime_in_minutes = 60
  13. refresh_token_lifetime_in_days = 7
  14. require_authentication = false
  15. require_email_verification = false
  16. default_admin_email = "admin@example.com"
  17. default_admin_password = "change_me_immediately"
  18. [completion]
  19. provider = "litellm"
  20. concurrent_request_limit = 64
  21. [completion.generation_config]
  22. model = "openai/gpt-4o"
  23. temperature = 0.1
  24. top_p = 1
  25. max_tokens_to_sample = 1_024
  26. stream = false
  27. add_generation_kwargs = { }
  28. [crypto]
  29. provider = "bcrypt"
  30. [database]
  31. provider = "postgres"
  32. default_collection_name = "Default"
  33. default_collection_description = "Your default collection."
  34. enable_fts = true # whether or not to enable full-text search, e.g `hybrid search`
  35. # KG settings
  36. batch_size = 256
  37. [database.graph_creation_settings]
  38. graph_entity_description_prompt = "graphrag_entity_description"
  39. entity_types = [] # if empty, all entities are extracted
  40. relation_types = [] # if empty, all relations are extracted
  41. fragment_merge_count = 1 # number of fragments to merge into a single extraction
  42. max_knowledge_relationships = 100
  43. max_description_input_length = 65536
  44. generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for relationshipt extraction
  45. [database.graph_entity_deduplication_settings]
  46. graph_entity_deduplication_type = "by_name"
  47. graph_entity_deduplication_prompt = "graphrag_entity_deduplication"
  48. max_description_input_length = 65536
  49. generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for deduplication
  50. [database.graph_enrichment_settings]
  51. community_reports_prompt = "graphrag_community_reports"
  52. max_summary_input_length = 65536
  53. generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for node description and graph clustering
  54. leiden_params = {}
  55. [database.graph_search_settings]
  56. generation_config = { model = "openai/gpt-4o-mini" }
  57. [embedding]
  58. provider = "litellm"
  59. # For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512`
  60. # RECOMMENDED - For advanced applications,
  61. # use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization
  62. base_model = "openai/text-embedding-3-small"
  63. base_dimension = 512
  64. # rerank_model = "huggingface/mixedbread-ai/mxbai-rerank-large-v1" # reranking model
  65. batch_size = 128
  66. add_title_as_prefix = false
  67. concurrent_request_limit = 256
  68. quantization_settings = { quantization_type = "FP32" }
  69. [file]
  70. provider = "postgres"
  71. [ingestion]
  72. provider = "r2r"
  73. chunking_strategy = "recursive"
  74. chunk_size = 1_024
  75. chunk_overlap = 512
  76. excluded_parsers = ["mp4"]
  77. # Ingestion-time document summary parameters
  78. # skip_document_summary = False
  79. # document_summary_system_prompt = 'default_system'
  80. # document_summary_task_prompt = 'default_summary'
  81. # chunks_for_document_summary = 128
  82. document_summary_model = "openai/gpt-4o-mini"
  83. vision_img_model = "openai/gpt-4o"
  84. vision_pdf_model = "openai/gpt-4o"
  85. [ingestion.chunk_enrichment_settings]
  86. enable_chunk_enrichment = false # disabled by default
  87. strategies = ["semantic", "neighborhood"]
  88. forward_chunks = 3
  89. backward_chunks = 3
  90. semantic_neighbors = 10
  91. semantic_similarity_threshold = 0.7
  92. generation_config = { model = "openai/gpt-4o-mini" }
  93. [ingestion.extra_parsers]
  94. pdf = "zerox"
  95. [logging]
  96. provider = "r2r"
  97. log_table = "logs"
  98. log_info_table = "log_info"
  99. [orchestration]
  100. provider = "simple"
  101. [prompt]
  102. provider = "r2r"
  103. [email]
  104. provider = "console_mock"