jack
/
r2r


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
							[app]
# app settings are global available like `r2r_config.agent.app`
# project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var

[agent]
system_instruction_name = "rag_agent"
# tool_names = ["local_search", "web_search"] # uncomment to enable web search
tool_names = ["local_search"]

  [agent.generation_config]
  model = "openai/gpt-4o"

[auth]
provider = "r2r"
access_token_lifetime_in_minutes = 60
refresh_token_lifetime_in_days = 7
require_authentication = false
require_email_verification = false
default_admin_email = "admin@example.com"
default_admin_password = "change_me_immediately"

[completion]
provider = "litellm"
concurrent_request_limit = 64

  [completion.generation_config]
  model = "openai/gpt-4o"
  temperature = 0.1
  top_p = 1
  max_tokens_to_sample = 1_024
  stream = false
  add_generation_kwargs = { }

[crypto]
provider = "bcrypt"

[database]
provider = "postgres"
default_collection_name = "Default"
default_collection_description = "Your default collection."
enable_fts = true # whether or not to enable full-text search, e.g `hybrid search`

# KG settings
batch_size = 256

  [database.graph_creation_settings]
    graph_entity_description_prompt = "graphrag_entity_description"
    entity_types = [] # if empty, all entities are extracted
    relation_types = [] # if empty, all relations are extracted
    fragment_merge_count = 1 # number of fragments to merge into a single extraction
    max_knowledge_relationships = 100
    max_description_input_length = 65536
    generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for relationshipt extraction

  [database.graph_entity_deduplication_settings]
    graph_entity_deduplication_type = "by_name"
    graph_entity_deduplication_prompt = "graphrag_entity_deduplication"
    max_description_input_length = 65536
    generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for deduplication

  [database.graph_enrichment_settings]
    community_reports_prompt = "graphrag_community_reports"
    max_summary_input_length = 65536
    generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for node description and graph clustering
    leiden_params = {}

  [database.graph_search_settings]
    generation_config = { model = "openai/gpt-4o-mini" }

[embedding]
provider = "litellm"

# For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512`

# RECOMMENDED - For advanced applications,
# use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization
base_model = "openai/text-embedding-3-small"
base_dimension = 512

# rerank_model = "huggingface/mixedbread-ai/mxbai-rerank-large-v1" # reranking model

batch_size = 128
add_title_as_prefix = false
concurrent_request_limit = 256
quantization_settings = { quantization_type = "FP32" }

[file]
provider = "postgres"

[ingestion]
provider = "r2r"
chunking_strategy = "recursive"
chunk_size = 1_024
chunk_overlap = 512
excluded_parsers = ["mp4"]

# Ingestion-time document summary parameters
# skip_document_summary = False
# document_summary_system_prompt = 'default_system'
# document_summary_task_prompt = 'default_summary'
# chunks_for_document_summary = 128
document_summary_model = "openai/gpt-4o-mini"
vision_img_model = "openai/gpt-4o"
vision_pdf_model = "openai/gpt-4o"

  [ingestion.chunk_enrichment_settings]
    enable_chunk_enrichment = false # disabled by default
    strategies = ["semantic", "neighborhood"]
    forward_chunks = 3
    backward_chunks = 3
    semantic_neighbors = 10
    semantic_similarity_threshold = 0.7
    generation_config = { model = "openai/gpt-4o-mini" }

  [ingestion.extra_parsers]
    pdf = "zerox"

[logging]
provider = "r2r"
log_table = "logs"
log_info_table = "log_info"

[orchestration]
provider = "simple"


[prompt]
provider = "r2r"

[email]
provider = "console_mock"