Gogs 1 месяц назад
Родитель
Сommit
17f6ff22fb

+ 0 - 0
docker/.data/victoria-logs/flock.lock


BIN
docker/.docker-compose.yaml.swp


+ 103 - 0
docker/.env

@@ -0,0 +1,103 @@
+# R2R
+R2R_PORT=7272
+R2R_HOST=0.0.0.0
+R2R_LOG_LEVEL=INFO
+R2R_CONFIG_NAME=full
+R2R_CONFIG_PATH=
+R2R_PROJECT_NAME=r2r_default
+R2R_SECRET_KEY=
+
+# Postgres Configuration
+R2R_POSTGRES_USER=postgres
+R2R_POSTGRES_PASSWORD=cocorobo-123
+R2R_POSTGRES_HOST=postgres
+R2R_POSTGRES_PORT=5432
+R2R_POSTGRES_DBNAME=postgres
+R2R_POSTGRES_MAX_CONNECTIONS=102400
+R2R_POSTGRES_STATEMENT_CACHE_SIZE=1000
+
+# Hatchet
+HATCHET_CLIENT_TLS_STRATEGY=none
+
+# OpenAI
+OPENAI_API_KEY=sk-j9Uwupu0NPZtdDS_IfEZlRWpX1JgFyZFLZProkesy2QbtqMs16pDnylAozU
+OPENAI_API_BASE=https://onehub.cocorobo.cn/v1
+
+# Azure Foundry
+AZURE_FOUNDRY_API_ENDPOINT=
+AZURE_FOUNDRY_API_KEY=
+
+# XAI / GROK
+XAI_API_KEY=
+
+# Anthropic
+ANTHROPIC_API_KEY=
+
+# Azure
+AZURE_API_KEY=
+AZURE_API_BASE=
+AZURE_API_VERSION=
+
+# Google Vertex AI
+GOOGLE_APPLICATION_CREDENTIALS=
+VERTEX_PROJECT=
+VERTEX_LOCATION=
+
+# Google Gemini
+GEMINI_API_KEY=
+
+# AWS Bedrock
+AWS_ACCESS_KEY_ID=AKIATLPEDU37QV5CHLMH
+AWS_SECRET_ACCESS_KEY=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
+AWS_REGION_NAME=cn-northwest-1
+
+# Groq
+GROQ_API_KEY=
+
+# Cohere
+COHERE_API_KEY=
+
+# Anyscale
+ANYSCALE_API_KEY=
+
+# Ollama
+OLLAMA_API_BASE=http://34.228.204.21:11434
+
+# LM Studio
+LM_STUDIO_API_BASE=http://host.docker.internal:1234
+LM_STUDIO_API_KEY=1234
+
+# Huggingface
+HUGGINGFACE_API_BASE=http://host.docker.internal:8080
+HUGGINGFACE_API_KEY=
+
+# Unstructured
+UNSTRUCTURED_API_KEY=
+UNSTRUCTURED_API_URL=https://api.unstructured.io/general/v0/general
+UNSTRUCTURED_SERVICE_URL=http://unstructured:7275
+UNSTRUCTURED_NUM_WORKERS=10
+
+# Graphologic
+CLUSTERING_SERVICE_URL=http://graph_clustering:7276
+
+# OAuth Credentials
+GOOGLE_CLIENT_ID=
+GOOGLE_CLIENT_SECRET=
+GOOGLE_REDIRECT_URI=
+
+GITHUB_CLIENT_ID=
+GITHUB_CLIENT_SECRET=
+GITHUB_REDIRECT_URI=
+
+# Email
+MAILERSEND_API_KEY=
+SENDGRID_API_KEY=
+
+# Other
+SERPER_API_KEY=
+SENDGRID_API_KEY=
+R2R_SENTRY_DSN=
+R2R_SENTRY_ENVIRONMENT=
+R2R_SENTRY_TRACES_SAMPLE_RATE=
+R2R_SENTRY_PROFILES_SAMPLE_RATE=
+FIRECRAWL_API_KEY=


+ 68 - 0
docker/Dockerfile

@@ -0,0 +1,68 @@
+FROM python:3.12-slim AS builder
+
+# Install system dependencies
+RUN sed -i 's/deb.debian.org/mirrors.ustc.edu.cn/g' /etc/apt/sources.list
+RUN sed -i 's/security.debian.org/mirrors.ustc.edu.cn/g' /etc/apt/sources.list
+# 在 builder 和 final stage 的 RUN apt-get update 前,可以尝试更彻底的替换
+RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
+    echo "deb https://mirrors.aliyun.com/debian/ bookworm main non-free non-free-firmware" > /etc/apt/sources.list && \
+    echo "deb https://mirrors.aliyun.com/debian/ bookworm-updates main non-free non-free-firmware" >> /etc/apt/sources.list && \
+    echo "deb https://mirrors.aliyun.com/debian/ bookworm-backports main non-free non-free-firmware" >> /etc/apt/sources.list && \
+    echo "deb https://mirrors.aliyun.com/debian-security bookworm-security main non-free non-free-firmware" >> /etc/apt/sources.list
+
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    gcc g++ musl-dev curl libffi-dev gfortran libopenblas-dev \
+    poppler-utils \
+    && apt-get clean && rm -rf /var/lib/apt/lists/* \
+    && curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://mirrors.ustc.edu.cn/rust-static/rustup/rustup-init.sh | sh -s -- -y
+#    && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+
+# Add Rust to PATH
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+# Create the /app/py directory
+RUN mkdir -p /app/py
+WORKDIR /app/py
+COPY pyproject.toml ./
+RUN pip install -e ".[core]" && \
+    pip install gunicorn uvicorn pydantic
+
+# Optionally, if you want gunicorn and uvicorn explicitly installed, you can
+# either list them under [project] in `pyproject.toml` or install them here:
+RUN pip install --no-cache-dir gunicorn uvicorn
+
+# Create the final image
+FROM python:3.12-slim
+
+RUN sed -i 's/deb.debian.org/mirrors.ustc.edu.cn/g' /etc/apt/sources.list
+RUN sed -i 's/security.debian.org/mirrors.ustc.edu.cn/g' /etc/apt/sources.list
+
+# 在 builder 和 final stage 的 RUN apt-get update 前,可以尝试更彻底的替换
+RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
+    echo "deb https://mirrors.aliyun.com/debian/ bookworm main non-free non-free-firmware" > /etc/apt/sources.list && \
+    echo "deb https://mirrors.aliyun.com/debian/ bookworm-updates main non-free non-free-firmware" >> /etc/apt/sources.list && \
+    echo "deb https://mirrors.aliyun.com/debian/ bookworm-backports main non-free non-free-firmware" >> /etc/apt/sources.list && \
+    echo "deb https://mirrors.aliyun.com/debian-security bookworm-security main non-free non-free-firmware" >> /etc/apt/sources.list
+
+# Minimal runtime deps
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl poppler-utils \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Copy the built environment from builder to final image
+# (If you want a fully self-contained environment, copy /usr/local)
+COPY --from=builder /usr/local /usr/local
+
+WORKDIR /app
+
+# Copy the rest of your source code
+COPY . /app
+
+# Expose environment variables and port
+ARG R2R_PORT=8000 R2R_HOST=0.0.0.0
+ENV R2R_PORT=$R2R_PORT R2R_HOST=$R2R_HOST
+EXPOSE $R2R_PORT
+
+# Launch the app
+CMD ["sh", "-c", "uvicorn core.main.app_entry:app --host $R2R_HOST --port $R2R_PORT"]

+ 392 - 0
docker/compose.full.swarm.yaml

@@ -0,0 +1,392 @@
+volumes:
+  hatchet_certs:
+    name: ${VOLUME_HATCHET_CERTS:-hatchet_certs}
+  hatchet_config:
+    name: ${VOLUME_HATCHET_CONFIG:-hatchet_config}
+  hatchet_api_key:
+    name: ${VOLUME_HATCHET_API_KEY:-hatchet_api_key}
+  postgres_data:
+    name: ${VOLUME_POSTGRES_DATA:-postgres_data}
+  hatchet_rabbitmq_data:
+    name: ${VOLUME_HATCHET_RABBITMQ_DATA:-hatchet_rabbitmq_data}
+  hatchet_rabbitmq_conf:
+    name: ${VOLUME_HATCHET_RABBITMQ_CONF:-hatchet_rabbitmq_conf}
+  hatchet_postgres_data:
+    name: ${VOLUME_HATCHET_POSTGRES_DATA:-hatchet_postgres_data}
+
+services:
+  postgres:
+    image: pgvector/pgvector:pg16
+    environment:
+      - POSTGRES_USER=${R2R_POSTGRES_USER:-postgres}
+      - POSTGRES_PASSWORD=${R2R_POSTGRES_PASSWORD:-postgres}
+      - POSTGRES_HOST=${R2R_POSTGRES_HOST:-postgres}
+      - POSTGRES_PORT=${R2R_POSTGRES_PORT:-5432}
+      - POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-1024}
+      - PGPORT=${R2R_POSTGRES_PORT:-5432}
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+    ports:
+      - "${R2R_POSTGRES_PORT:-5432}:${R2R_POSTGRES_PORT:-5432}"
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${R2R_POSTGRES_USER:-postgres}"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    command: >
+      postgres
+      -c max_connections=${R2R_POSTGRES_MAX_CONNECTIONS:-1024}
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  hatchet-postgres:
+    image: postgres:latest
+    environment:
+      POSTGRES_DB: ${HATCHET_POSTGRES_DBNAME:-hatchet}
+      POSTGRES_USER: ${HATCHET_POSTGRES_USER:-hatchet_user}
+      POSTGRES_PASSWORD: ${HATCHET_POSTGRES_PASSWORD:-hatchet_password}
+    volumes:
+      - hatchet_postgres_data:/var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${HATCHET_POSTGRES_USER:-hatchet_user} -d ${HATCHET_POSTGRES_DBNAME:-hatchet}"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  hatchet-rabbitmq:
+    image: "rabbitmq:3-management"
+    hostname: "hatchet-rabbitmq"
+    ports:
+      - "${R2R_RABBITMQ_PORT:-5673}:5672"
+      - "${R2R_RABBITMQ_MGMT_PORT:-15673}:15672"
+    environment:
+      RABBITMQ_DEFAULT_USER: "user"
+      RABBITMQ_DEFAULT_PASS: "password"
+    volumes:
+      - hatchet_rabbitmq_data:/var/lib/rabbitmq
+      - hatchet_rabbitmq_conf:/etc/rabbitmq/rabbitmq.conf
+    healthcheck:
+      test: ["CMD", "rabbitmqctl", "status"]
+      interval: 10s
+      timeout: 10s
+      retries: 5
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  hatchet-create-db:
+    image: postgres:latest
+    command: >
+      sh -c "
+        set -e
+        echo 'Waiting for PostgreSQL to be ready...'
+        while ! pg_isready -h hatchet-postgres -p 5432 -U ${HATCHET_POSTGRES_USER:-hatchet_user}; do
+          sleep 1
+        done
+        echo 'PostgreSQL is ready, checking if database exists...'
+        if ! PGPASSWORD=${HATCHET_POSTGRES_PASSWORD:-hatchet_password} psql -h hatchet-postgres -p 5432 -U ${HATCHET_POSTGRES_USER:-hatchet_user} -lqt | grep -qw ${HATCHET_POSTGRES_DBNAME:-hatchet}; then
+          echo 'Database does not exist, creating it...'
+          PGPASSWORD=${HATCHET_POSTGRES_PASSWORD:-hatchet_password} createdb -h hatchet-postgres -p 5432 -U ${HATCHET_POSTGRES_USER:-hatchet_user} -w ${HATCHET_POSTGRES_DBNAME:-hatchet}
+        else
+          echo 'Database already exists, skipping creation.'
+        fi
+      "
+    environment:
+      DATABASE_URL: "postgres://${HATCHET_POSTGRES_USER:-hatchet_user}:${HATCHET_POSTGRES_PASSWORD:-hatchet_password}@hatchet-postgres:5432/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable"
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  hatchet-migration:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-migrate:v0.53.15
+    environment:
+      DATABASE_URL: "postgres://${HATCHET_POSTGRES_USER:-hatchet_user}:${HATCHET_POSTGRES_PASSWORD:-hatchet_password}@hatchet-postgres:5432/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable"
+    depends_on:
+      - hatchet-create-db
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  hatchet-setup-config:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:v0.53.15
+    command: /hatchet/hatchet-admin quickstart --skip certs --generated-config-dir /hatchet/config --overwrite=false
+    environment:
+      DATABASE_URL: "postgres://${HATCHET_POSTGRES_USER:-hatchet_user}:${HATCHET_POSTGRES_PASSWORD:-hatchet_password}@hatchet-postgres:5432/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable"
+
+      HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH: "${HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH:-134217728}"
+      HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH: "${HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH:-134217728}"
+
+      DATABASE_POSTGRES_PORT: "5432"
+      DATABASE_POSTGRES_HOST: hatchet-postgres
+      DATABASE_POSTGRES_USERNAME: "${HATCHET_POSTGRES_USER:-hatchet_user}"
+      DATABASE_POSTGRES_PASSWORD: "${HATCHET_POSTGRES_PASSWORD:-hatchet_password}"
+      HATCHET_DATABASE_POSTGRES_DB_NAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}"
+
+      SERVER_TASKQUEUE_RABBITMQ_URL: amqp://user:password@hatchet-rabbitmq:5672/
+      SERVER_AUTH_COOKIE_DOMAIN: "http://host.docker.internal:${R2R_HATCHET_DASHBOARD_PORT:-7274}"
+      SERVER_URL: "http://host.docker.internal:${R2R_HATCHET_DASHBOARD_PORT:-7274}"
+      SERVER_AUTH_COOKIE_INSECURE: "t"
+      SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
+      SERVER_GRPC_INSECURE: "t"
+      SERVER_GRPC_BROADCAST_ADDRESS: "hatchet-engine:7077"
+      SERVER_GRPC_MAX_MSG_SIZE: 134217728
+    volumes:
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+    depends_on:
+      - hatchet-migration
+      - hatchet-rabbitmq
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  hatchet-engine:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-engine:v0.53.15
+    command: /hatchet/hatchet-engine --config /hatchet/config
+    depends_on:
+      - hatchet-setup-config
+    ports:
+      - "${R2R_HATCHET_ENGINE_PORT:-7077}:7077"
+    environment:
+      DATABASE_URL: "postgres://${HATCHET_POSTGRES_USER:-hatchet_user}:${HATCHET_POSTGRES_PASSWORD:-hatchet_password}@hatchet-postgres:5432/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable"
+      SERVER_GRPC_BROADCAST_ADDRESS: "hatchet-engine:7077"
+      SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
+      SERVER_GRPC_PORT: "7077"
+      SERVER_GRPC_INSECURE: "t"
+      SERVER_GRPC_MAX_MSG_SIZE: 134217728
+    volumes:
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+    healthcheck:
+      test: ["CMD", "wget", "-q", "-O", "-", "http://localhost:8733/live"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  hatchet-dashboard:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-dashboard:v0.53.15
+    command: sh ./entrypoint.sh --config /hatchet/config
+    depends_on:
+      - hatchet-setup-config
+    environment:
+      DATABASE_URL: "postgres://${HATCHET_POSTGRES_USER:-hatchet_user}:${HATCHET_POSTGRES_PASSWORD:-hatchet_password}@hatchet-postgres:5432/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable"
+    volumes:
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+    ports:
+      - "${R2R_HATCHET_DASHBOARD_PORT:-7274}:80"
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  setup-token:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:v0.53.15
+    command: sh /scripts/setup-token.sh
+    volumes:
+      - ./scripts:/scripts
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+      - hatchet_api_key:/hatchet_api_key
+    depends_on:
+      - hatchet-setup-config
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  unstructured:
+    image: ${UNSTRUCTURED_IMAGE:-ragtoriches/unst-prod}
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:7275/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  graph_clustering:
+    image: ${GRAPH_CLUSTERING_IMAGE:-ragtoriches/cluster-prod}
+    ports:
+      - "${R2R_GRAPH_CLUSTERING_PORT:-7276}:7276"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:7276/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  r2r:
+    image: sciphiai/r2r:latest
+    ports:
+      - "${R2R_PORT:-7272}:${R2R_PORT:-7272}"
+    environment:
+      - PYTHONUNBUFFERED=1
+      - R2R_PORT=${R2R_PORT:-7272}
+      - R2R_HOST=${R2R_HOST:-0.0.0.0}
+
+      # R2R
+      - R2R_LOG_LEVEL=${R2R_LOG_LEVEL:-INFO}
+      - R2R_LOG_CONSOLE_FORMATTER=${R2R_LOG_CONSOLE_FORMATTER:-json}
+      - R2R_CONFIG_NAME=${R2R_CONFIG_NAME:-}
+      - R2R_CONFIG_PATH=${R2R_CONFIG_PATH:-}
+      - R2R_PROJECT_NAME=${R2R_PROJECT_NAME:-r2r_default}
+      - R2R_SECRET_KEY=${R2R_SECRET_KEY:-}
+
+      # Postgres
+      - R2R_POSTGRES_USER=${R2R_POSTGRES_USER:-postgres}
+      - R2R_POSTGRES_PASSWORD=${R2R_POSTGRES_PASSWORD:-postgres}
+      - R2R_POSTGRES_HOST=${R2R_POSTGRES_HOST:-postgres}
+      - R2R_POSTGRES_PORT=${R2R_POSTGRES_PORT:-5432}
+      - R2R_POSTGRES_DBNAME=${R2R_POSTGRES_DBNAME:-postgres}
+      - R2R_POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-1024}
+      - R2R_POSTGRES_STATEMENT_CACHE_SIZE=${R2R_POSTGRES_STATEMENT_CACHE_SIZE:-100}
+
+      # OpenAI
+      - OPENAI_API_KEY=${OPENAI_API_KEY:-}
+      - OPENAI_API_BASE=${OPENAI_API_BASE:-}
+
+      # Azure Foundry
+      - AZURE_FOUNDRY_API_ENDPOINT=${AZURE_FOUNDRY_API_ENDPOINT:-}
+      - AZURE_FOUNDRY_API_KEY=${AZURE_FOUNDRY_API_KEY:-}
+
+      # XAI / GROK
+      - XAI_API_KEY=${XAI_API_KEY:-}
+
+      # Anthropic
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
+
+      # Azure
+      - AZURE_API_KEY=${AZURE_API_KEY:-}
+      - AZURE_API_BASE=${AZURE_API_BASE:-}
+      - AZURE_API_VERSION=${AZURE_API_VERSION:-}
+
+      # Google Vertex AI
+      - GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-}
+      - VERTEX_PROJECT=${VERTEX_PROJECT:-}
+      - VERTEX_LOCATION=${VERTEX_LOCATION:-}
+
+      # Google Gemini
+      - GEMINI_API_KEY=${GEMINI_API_KEY:-}
+
+      # AWS Bedrock
+      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-}
+      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-}
+      - AWS_REGION_NAME=${AWS_REGION_NAME:-}
+
+      # Groq
+      - GROQ_API_KEY=${GROQ_API_KEY:-}
+
+      # Cohere
+      - COHERE_API_KEY=${COHERE_API_KEY:-}
+
+      # Anyscale
+      - ANYSCALE_API_KEY=${ANYSCALE_API_KEY:-}
+
+      # Ollama
+      - OLLAMA_API_BASE=${OLLAMA_API_BASE:-http://host.docker.internal:11434}
+
+      # LM Studio
+      - LM_STUDIO_API_BASE=${LM_STUDIO_API_BASE:-http://host.docker.internal:1234}
+      - LM_STUDIO_API_KEY=${LM_STUDIO_API_KEY:-1234}
+
+      # Huggingface
+      - HUGGINGFACE_API_BASE=${HUGGINGFACE_API_BASE:-http://host.docker.internal:8080}
+      - HUGGINGFACE_API_KEY=${HUGGINGFACE_API_KEY}
+
+      # Unstructured
+      - UNSTRUCTURED_API_KEY=${UNSTRUCTURED_API_KEY:-}
+      - UNSTRUCTURED_API_URL=${UNSTRUCTURED_API_URL:-https://api.unstructured.io/general/v0/general}
+      - UNSTRUCTURED_SERVICE_URL=${UNSTRUCTURED_SERVICE_URL:-http://unstructured:7275}
+      - UNSTRUCTURED_NUM_WORKERS=${UNSTRUCTURED_NUM_WORKERS:-10}
+
+      # Hatchet
+      - HATCHET_CLIENT_TLS_STRATEGY=none
+      - HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH=${HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH:-134217728}
+      - HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH=${HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH:-134217728}
+
+      # Graphologic
+      - CLUSTERING_SERVICE_URL=http://graph_clustering:7276
+
+      # OAuth Credentials
+      - GOOGLE_CLIENT_ID=${GOOGLE_CLIENT_ID}
+      - GOOGLE_CLIENT_SECRET=${GOOGLE_CLIENT_SECRET}
+      - GOOGLE_REDIRECT_URI=${GOOGLE_REDIRECT_URI}
+
+      - GITHUB_CLIENT_ID=${GITHUB_CLIENT_ID}
+      - GITHUB_CLIENT_SECRET=${GITHUB_CLIENT_SECRET}
+      - GITHUB_REDIRECT_URI=${GITHUB_REDIRECT_URI}
+
+      # Other
+      - FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY}
+      - SERPER_API_KEY=${SERPER_API_KEY}
+      - SENDGRID_API_KEY=${SENDGRID_API_KEY}
+      - R2R_SENTRY_DSN=${R2R_SENTRY_DSN}
+      - R2R_SENTRY_ENVIRONMENT=${R2R_SENTRY_ENVIRONMENT}
+      - R2R_SENTRY_TRACES_SAMPLE_RATE=${R2R_SENTRY_TRACES_SAMPLE_RATE}
+      - R2R_SENTRY_PROFILES_SAMPLE_RATE=${R2R_SENTRY_PROFILES_SAMPLE_RATE}
+
+    command: >
+      sh -c '
+        if [ -z "$${HATCHET_CLIENT_TOKEN}" ]; then
+          export HATCHET_CLIENT_TOKEN=$$(cat /hatchet_api_key/api_key.txt)
+        fi
+        exec uvicorn core.main.app_entry:app --host $${R2R_HOST} --port $${R2R_PORT}
+      '
+    volumes:
+      - ${R2R_CONFIG_PATH:-/}:${R2R_CONFIG_PATH:-/app/config}
+      - hatchet_api_key:/hatchet_api_key:ro
+    extra_hosts:
+      - host.docker.internal:host-gateway
+    depends_on:
+      - setup-token
+      - unstructured
+      - graph_clustering
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:${R2R_PORT:-7272}/v3/health"]
+      interval: 6s
+      timeout: 5s
+      retries: 5
+      start_period: 30s
+    deploy:
+      replicas: ${R2R_REPLICAS:-3}
+      restart_policy:
+        condition: on-failure
+      update_config:
+        parallelism: 1
+        delay: 30s
+        order: start-first
+      rollback_config:
+        parallelism: 1
+        delay: 30s
+
+  r2r-dashboard:
+    image: sciphiai/r2r-dashboard:1.0.3
+    environment:
+      - NEXT_PUBLIC_R2R_DEPLOYMENT_URL=${R2R_DEPLOYMENT_URL:-http://localhost:7272}
+      - NEXT_PUBLIC_HATCHET_DASHBOARD_URL=${HATCHET_DASHBOARD_URL:-http://localhost:${R2R_HATCHET_DASHBOARD_PORT:-7274}}
+    ports:
+      - "${R2R_DASHBOARD_PORT:-7273}:3000"
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure

+ 215 - 0
docker/compose.full.yaml

@@ -0,0 +1,215 @@
+volumes:
+  hatchet_certs:
+    name: hatchet_certs
+  hatchet_config:
+    name: hatchet_config
+  hatchet_api_key:
+    name: hatchet_api_key
+  hatchet_rabbitmq_data:
+    name: hatchet_rabbitmq_data
+  hatchet_rabbitmq_conf:
+    name: hatchet_rabbitmq_conf
+  hatchet_postgres_data:
+    name: hatchet_postgres_data
+  minio_data:
+    name: minio_data
+  postgres_data:
+    name: postgres_data
+
+services:
+  postgres:
+    image: pgvector/pgvector:pg16
+    profiles: [postgres]
+    env_file:
+      - ./env/postgres.env
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+    ports:
+      - "5432:5432"
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U postgres"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    restart: on-failure
+    command: >
+      postgres
+      -c max_connections=1024
+
+  minio:
+    image: minio/minio
+    profiles: [minio]
+    env_file:
+      - ./env/minio.env
+    volumes:
+      - minio_data:/data
+    ports:
+      - "9000:9000"
+      - "9001:9001"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    restart: on-failure
+    command: server /data --console-address ":9001"
+
+  hatchet-postgres:
+    image: postgres:latest
+    env_file:
+      - ./env/hatchet.env
+    volumes:
+      - hatchet_postgres_data:/var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U hatchet_user -d hatchet"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+
+  hatchet-rabbitmq:
+    image: "rabbitmq:3-management"
+    hostname: "hatchet-rabbitmq"
+    ports:
+      - "5673:5672"
+      - "15673:15672"
+    env_file:
+      - ./env/hatchet.env
+    volumes:
+      - hatchet_rabbitmq_data:/var/lib/rabbitmq
+      - hatchet_rabbitmq_conf:/etc/rabbitmq/rabbitmq.conf
+    healthcheck:
+      test: ["CMD", "rabbitmqctl", "status"]
+      interval: 10s
+      timeout: 10s
+      retries: 5
+
+  hatchet-create-db:
+    image: postgres:latest
+    command: sh /scripts/create-hatchet-db.sh
+    volumes:
+      - ./scripts:/scripts
+    env_file:
+      - ./env/hatchet.env
+
+  hatchet-migration:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-migrate:v0.53.15
+    env_file:
+      - ./env/hatchet.env
+    depends_on:
+      hatchet-create-db:
+        condition: service_completed_successfully
+
+  hatchet-setup-config:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:v0.53.15
+    command: /hatchet/hatchet-admin quickstart --skip certs --generated-config-dir /hatchet/config --overwrite=false
+    env_file:
+      - ./env/hatchet.env
+    volumes:
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+    depends_on:
+      hatchet-migration:
+        condition: service_completed_successfully
+      hatchet-rabbitmq:
+        condition: service_healthy
+
+  hatchet-engine:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-engine:v0.53.15
+    command: /hatchet/hatchet-engine --config /hatchet/config
+    restart: on-failure
+    depends_on:
+      hatchet-setup-config:
+        condition: service_completed_successfully
+    ports:
+      - "7077:7077"
+    env_file:
+      - ./env/hatchet.env
+    volumes:
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+    healthcheck:
+      test: ["CMD", "wget", "-q", "-O", "-", "http://localhost:8733/live"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+  hatchet-dashboard:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-dashboard:v0.53.15
+    command: sh ./entrypoint.sh --config /hatchet/config
+    restart: on-failure
+    depends_on:
+      hatchet-setup-config:
+        condition: service_completed_successfully
+    env_file:
+      - ./env/hatchet.env
+    volumes:
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+    ports:
+      - "7274:80"
+
+  setup-token:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:v0.53.15
+    command: sh /scripts/setup-token.sh
+    volumes:
+      - ./scripts:/scripts
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+      - hatchet_api_key:/hatchet_api_key
+    depends_on:
+      hatchet-setup-config:
+        condition: service_completed_successfully
+
+  unstructured:
+    image: ragtoriches/unst-prod
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:7275/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+  graph_clustering:
+    image: ragtoriches/cluster-prod
+    ports:
+      - "7276:7276"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:7276/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+  r2r:
+    image: sciphiai/r2r:latest
+    ports:
+      - "7272:7272"
+    env_file:
+      - ./env/r2r-full.env
+    command: sh /scripts/start-r2r.sh
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:7272/v3/health"]
+      interval: 6s
+      timeout: 5s
+      retries: 5
+    restart: on-failure
+    volumes:
+      - ./user_configs:/app/user_configs
+      - ./user_tools:/app/user_tools
+      - hatchet_api_key:/hatchet_api_key:ro
+      - ./scripts:/scripts
+    extra_hosts:
+      - host.docker.internal:host-gateway
+    depends_on:
+      setup-token:
+        condition: service_completed_successfully
+      unstructured:
+        condition: service_healthy
+      graph_clustering:
+        condition: service_healthy
+
+  r2r-dashboard:
+    image: sciphiai/r2r-dashboard:1.0.3
+    env_file:
+      - ./env/r2r-dashboard.env
+    ports:
+      - "7273:3000"

+ 78 - 0
docker/compose.yaml

@@ -0,0 +1,78 @@
+volumes:
+  postgres_data:
+    name: postgres_data
+  minio_data:
+    name: minio_data
+
+services:
+  postgres:
+    image: pgvector/pgvector:pg16
+    profiles: [postgres]
+    env_file:
+      - ./env/postgres.env
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+    ports:
+      - "5432:5432"
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U postgres"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    restart: on-failure
+    command: >
+      postgres
+      -c max_connections=1024
+
+  minio:
+    image: minio/minio
+    profiles: [minio]
+    env_file:
+      - ./env/minio.env
+    volumes:
+      - minio_data:/data
+    ports:
+      - "9000:9000"
+      - "9001:9001"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    restart: on-failure
+    command: server /data --console-address ":9001"
+
+  graph_clustering:
+    image: ragtoriches/cluster-prod
+    ports:
+      - "7276:7276"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:7276/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+  r2r:
+    image: sciphiai/r2r:latest
+    ports:
+      - "7272:7272"
+    env_file:
+      - ./env/r2r.env
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:7272/v3/health"]
+      interval: 6s
+      timeout: 5s
+      retries: 5
+    restart: on-failure
+    volumes:
+      - ./user_configs:/app/user_configs
+      - ./user_tools:/app/user_tools
+    extra_hosts:
+      - host.docker.internal:host-gateway
+
+  r2r-dashboard:
+    image: sciphiai/r2r-dashboard:1.0.3
+    env_file:
+      - ./env/r2r-dashboard.env
+    ports:
+      - "7273:3000"

+ 481 - 0
docker/docker-compose.yaml

@@ -0,0 +1,481 @@
+networks:
+  r2r-network:
+    external: true
+    attachable: true
+    labels:
+      - "com.docker.compose.recreate=always"
+    #name: r2r_r2r-network
+
+volumes:
+  hatchet_certs:
+    name: ${VOLUME_HATCHET_CERTS:-hatchet_certs}
+  hatchet_config:
+    name: ${VOLUME_HATCHET_CONFIG:-hatchet_config}
+  hatchet_api_key:
+    name: ${VOLUME_HATCHET_API_KEY:-hatchet_api_key}
+  postgres_data:
+    name: ${VOLUME_POSTGRES_DATA:-postgres_data}
+  hatchet_rabbitmq_data:
+    name: ${VOLUME_HATCHET_RABBITMQ_DATA:-hatchet_rabbitmq_data}
+  hatchet_rabbitmq_conf:
+    name: ${VOLUME_HATCHET_RABBITMQ_CONF:-hatchet_rabbitmq_conf}
+  hatchet_postgres_data:
+    name: ${VOLUME_HATCHET_POSTGRES_DATA:-hatchet_postgres_data}
+
+services:
+  postgres:
+    image: pgvector/pgvector:pg16
+    shm_size: 1gb
+    environment:
+      - POSTGRES_USER=${R2R_POSTGRES_USER:-postgres}
+      - POSTGRES_PASSWORD=${R2R_POSTGRES_PASSWORD:-cocorobo-123}
+      - POSTGRES_HOST=${R2R_POSTGRES_HOST:-postgres}
+      - POSTGRES_PORT=${R2R_POSTGRES_PORT:-5432}
+      - POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-102400}
+      - PGPORT=${R2R_POSTGRES_PORT:-5432}
+    volumes:
+      - ./postgres_data:/var/lib/postgresql/data
+    networks:
+      - r2r-network
+    ports:
+      - "${R2R_POSTGRES_PORT:-5432}:${R2R_POSTGRES_PORT:-5432}"
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${R2R_POSTGRES_USER:-postgres}"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    command: >
+      postgres
+      -c max_connections=${R2R_POSTGRES_MAX_CONNECTIONS:-102400}
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  hatchet-postgres:
+    image: postgres:latest
+    environment:
+      POSTGRES_DB: ${HATCHET_POSTGRES_DBNAME:-hatchet}
+      POSTGRES_USER: ${HATCHET_POSTGRES_USER:-hatchet_user}
+      POSTGRES_PASSWORD: ${HATCHET_POSTGRES_PASSWORD:-hatchet_password}
+    volumes:
+      - ./hatchet_postgres_data:/var/lib/postgresql/data
+    networks:
+      - r2r-network
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${HATCHET_POSTGRES_USER:-hatchet_user} -d ${HATCHET_POSTGRES_DBNAME:-hatchet}"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  hatchet-rabbitmq:
+    image: "rabbitmq:3-management"
+    hostname: "hatchet-rabbitmq"
+    ports:
+      - "${R2R_RABBITMQ_PORT:-5673}:5672"
+      - "${R2R_RABBITMQ_MGMT_PORT:-15673}:15672"
+    environment:
+      RABBITMQ_DEFAULT_USER: "user"
+      RABBITMQ_DEFAULT_PASS: "password"
+    volumes:
+      - hatchet_rabbitmq_data:/var/lib/rabbitmq
+      - hatchet_rabbitmq_conf:/etc/rabbitmq/rabbitmq.conf
+    healthcheck:
+      test: ["CMD", "rabbitmqctl", "status"]
+      interval: 10s
+      timeout: 10s
+      retries: 5
+    networks:
+      - r2r-network
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  hatchet-create-db:
+    image: postgres:latest
+    command: >
+      sh -c "
+        set -e
+        echo 'Waiting for PostgreSQL to be ready...'
+        while ! pg_isready -h hatchet-postgres -p 5432 -U ${HATCHET_POSTGRES_USER:-hatchet_user}; do
+          sleep 1
+        done
+        echo 'PostgreSQL is ready, checking if database exists...'
+        if ! PGPASSWORD=${HATCHET_POSTGRES_PASSWORD:-hatchet_password} psql -h hatchet-postgres -p 5432 -U ${HATCHET_POSTGRES_USER:-hatchet_user} -lqt | grep -qw ${HATCHET_POSTGRES_DBNAME:-hatchet}; then
+          echo 'Database does not exist, creating it...'
+          PGPASSWORD=${HATCHET_POSTGRES_PASSWORD:-hatchet_password} createdb -h hatchet-postgres -p 5432 -U ${HATCHET_POSTGRES_USER:-hatchet_user} -w ${HATCHET_POSTGRES_DBNAME:-hatchet}
+        else
+          echo 'Database already exists, skipping creation.'
+        fi
+      "
+    environment:
+      DATABASE_URL: "postgres://${HATCHET_POSTGRES_USER:-hatchet_user}:${HATCHET_POSTGRES_PASSWORD:-hatchet_password}@hatchet-postgres:5432/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable"
+    networks:
+      - r2r-network
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  hatchet-migration:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-migrate:v0.53.15
+    environment:
+      DATABASE_URL: "postgres://${HATCHET_POSTGRES_USER:-hatchet_user}:${HATCHET_POSTGRES_PASSWORD:-hatchet_password}@hatchet-postgres:5432/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable"
+    depends_on:
+      - hatchet-create-db
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+    networks:
+      - r2r-network
+
+  hatchet-setup-config:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:v0.53.15
+    command: /hatchet/hatchet-admin quickstart --skip certs --generated-config-dir /hatchet/config --overwrite=false
+    environment:
+      DATABASE_URL: "postgres://${HATCHET_POSTGRES_USER:-hatchet_user}:${HATCHET_POSTGRES_PASSWORD:-hatchet_password}@hatchet-postgres:5432/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable"
+
+      HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH: "${HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH:-134217728}"
+      HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH: "${HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH:-134217728}"
+
+      DATABASE_POSTGRES_PORT: "5432"
+      DATABASE_POSTGRES_HOST: hatchet-postgres
+      DATABASE_POSTGRES_USERNAME: "${HATCHET_POSTGRES_USER:-hatchet_user}"
+      DATABASE_POSTGRES_PASSWORD: "${HATCHET_POSTGRES_PASSWORD:-hatchet_password}"
+      HATCHET_DATABASE_POSTGRES_DB_NAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}"
+
+      SERVER_TASKQUEUE_RABBITMQ_URL: amqp://user:password@hatchet-rabbitmq:5672/
+      SERVER_AUTH_COOKIE_DOMAIN: "http://host.docker.internal:${R2R_HATCHET_DASHBOARD_PORT:-7274}"
+      SERVER_URL: "http://host.docker.internal:${R2R_HATCHET_DASHBOARD_PORT:-7274}"
+      SERVER_AUTH_COOKIE_INSECURE: "t"
+      SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
+      SERVER_GRPC_INSECURE: "t"
+      SERVER_GRPC_BROADCAST_ADDRESS: "hatchet-engine:7077"
+      SERVER_GRPC_MAX_MSG_SIZE: 134217728
+    volumes:
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+    depends_on:
+      - hatchet-migration
+      - hatchet-rabbitmq
+    networks:
+      - r2r-network
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  hatchet-engine:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-engine:v0.53.15
+    command: /hatchet/hatchet-engine --config /hatchet/config
+    depends_on:
+      - hatchet-setup-config
+    ports:
+      - "${R2R_HATCHET_ENGINE_PORT:-7077}:7077"
+    environment:
+      DATABASE_URL: "postgres://${HATCHET_POSTGRES_USER:-hatchet_user}:${HATCHET_POSTGRES_PASSWORD:-hatchet_password}@hatchet-postgres:5432/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable"
+      SERVER_GRPC_BROADCAST_ADDRESS: "hatchet-engine:7077"
+      SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
+      SERVER_GRPC_PORT: "7077"
+      SERVER_GRPC_INSECURE: "t"
+      SERVER_GRPC_MAX_MSG_SIZE: 134217728
+    volumes:
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+    healthcheck:
+      test: ["CMD", "wget", "-q", "-O", "-", "http://localhost:8733/live"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+    networks:
+      - r2r-network
+
+  hatchet-dashboard:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-dashboard:v0.53.15
+    command: sh ./entrypoint.sh --config /hatchet/config
+    depends_on:
+      - hatchet-setup-config
+    environment:
+      DATABASE_URL: "postgres://${HATCHET_POSTGRES_USER:-hatchet_user}:${HATCHET_POSTGRES_PASSWORD:-hatchet_password}@hatchet-postgres:5432/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable"
+    volumes:
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+    networks:
+      - r2r-network
+    ports:
+      - "${R2R_HATCHET_DASHBOARD_PORT:-7274}:80"
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  setup-token:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:v0.53.15
+    command: sh /scripts/setup-token.sh
+    volumes:
+      - ./scripts:/scripts
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+      - hatchet_api_key:/hatchet_api_key
+    depends_on:
+      - hatchet-setup-config
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+    networks:
+      - r2r-network
+
+  unstructured:
+    image: ${UNSTRUCTURED_IMAGE:-ragtoriches/unst-prod}
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:7275/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+    networks:
+      - r2r-network
+
+  graph_clustering:
+    image: ${GRAPH_CLUSTERING_IMAGE:-ragtoriches/cluster-prod}
+    ports:
+      - "${R2R_GRAPH_CLUSTERING_PORT:-7276}:7276"
+    networks:
+      - r2r-network
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:7276/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+
+  r2r:
+    image: sciphiai/r2r:latest
+    ports:
+      - "${R2R_PORT:-7272}:${R2R_PORT:-7272}"
+
+    environment:
+      - PYTHONUNBUFFERED=1
+      - R2R_PORT=${R2R_PORT:-7272}
+      - R2R_HOST=${R2R_HOST:-0.0.0.0}
+
+      # R2R
+      - R2R_LOG_LEVEL=${R2R_LOG_LEVEL:-INFO}
+      - R2R_LOG_CONSOLE_FORMATTER=${R2R_LOG_CONSOLE_FORMATTER:-json}
+      - R2R_CONFIG_NAME=${R2R_CONFIG_NAME:-}
+      - R2R_CONFIG_PATH=${R2R_CONFIG_PATH:-}
+      - R2R_PROJECT_NAME=${R2R_PROJECT_NAME:-r2r_default}
+      - R2R_SECRET_KEY=${R2R_SECRET_KEY:-}
+
+      # Postgres
+      - R2R_POSTGRES_USER=${R2R_POSTGRES_USER:-postgres}
+      - R2R_POSTGRES_PASSWORD=${R2R_POSTGRES_PASSWORD:-cocorobo-123}
+      - R2R_POSTGRES_HOST=${R2R_POSTGRES_HOST:-postgres}
+      - R2R_POSTGRES_PORT=${R2R_POSTGRES_PORT:-5432}
+      - R2R_POSTGRES_DBNAME=${R2R_POSTGRES_DBNAME:-postgres}
+      - R2R_POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-102400}
+      - R2R_POSTGRES_STATEMENT_CACHE_SIZE=${R2R_POSTGRES_STATEMENT_CACHE_SIZE:-1000}
+
+      # OpenAI
+      #- OPENAI_API_KEY=${OPENAI_API_KEY:-}
+      #- OPENAI_API_BASE=${OPENAI_API_BASE:-}
+
+      - OPENAI_API_KEY=${OPENAI_API_KEY:-sk-j9Uwupu0NPZtdDS_IfEZlRWpX1JgFyZFLZProkesy2QbtqMs16pDnylAozU}
+      - OPENAI_API_BASE=${OPENAI_API_BASE:-http://172.16.12.13:3000/v1}
+
+      # Azure Foundry
+      - AZURE_FOUNDRY_API_ENDPOINT=${AZURE_FOUNDRY_API_ENDPOINT:-}
+      - AZURE_FOUNDRY_API_KEY=${AZURE_FOUNDRY_API_KEY:-}
+
+      # XAI / GROK
+      - XAI_API_KEY=${XAI_API_KEY:-}
+
+      # Anthropic
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
+
+      # Azure
+      - AZURE_API_KEY=${AZURE_API_KEY:-}
+      - AZURE_API_BASE=${AZURE_API_BASE:-}
+      - AZURE_API_VERSION=${AZURE_API_VERSION:-}
+
+      # Google Vertex AI
+      - GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-}
+      - VERTEX_PROJECT=${VERTEX_PROJECT:-}
+      - VERTEX_LOCATION=${VERTEX_LOCATION:-}
+
+      # Google Gemini
+      - GEMINI_API_KEY=${GEMINI_API_KEY:-}
+
+      # AWS Bedrock
+      #- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-}
+      #- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-}
+      #- AWS_REGION_NAME=${AWS_REGION_NAME:-}
+      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-AKIATLPEDU37ZUJYM7GG}
+      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-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}
+      - AWS_REGION_NAME=${AWS_REGION_NAME:-cn-north-1}
+
+      # Groq
+      - GROQ_API_KEY=${GROQ_API_KEY:-}
+
+      # Cohere
+      - COHERE_API_KEY=${COHERE_API_KEY:-}
+
+      # Anyscale
+      - ANYSCALE_API_KEY=${ANYSCALE_API_KEY:-}
+
+      # Ollama
+      #- OLLAMA_API_BASE=${OLLAMA_API_BASE:-http://host.docker.internal:11434}
+      - OLLAMA_API_BASE=${OLLAMA_API_BASE:-http://34.228.204.21:11434}
+
+      # LM Studio
+      - LM_STUDIO_API_BASE=${LM_STUDIO_API_BASE:-}
+      - LM_STUDIO_API_KEY=${LM_STUDIO_API_KEY:-1234}
+
+      # Huggingface
+      - HUGGINGFACE_API_BASE=${HUGGINGFACE_API_BASE:-}
+      - HUGGINGFACE_API_KEY=${HUGGINGFACE_API_KEY}
+
+      # Unstructured
+      - UNSTRUCTURED_API_KEY=${UNSTRUCTURED_API_KEY:-cJR72sMAYxCtT6Pmd2PW3KmrjNcKp5}
+      - UNSTRUCTURED_API_URL=${UNSTRUCTURED_API_URL:-https://api.unstructured.io/general/v0/general}
+      - UNSTRUCTURED_SERVICE_URL=${UNSTRUCTURED_SERVICE_URL:-http://unstructured:7275}
+      - UNSTRUCTURED_NUM_WORKERS=${UNSTRUCTURED_NUM_WORKERS:-10}
+
+      # Hatchet
+      - HATCHET_CLIENT_TLS_STRATEGY=none
+      - HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH=${HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH:-134217728}
+      - HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH=${HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH:-134217728}
+
+      # Graphologic
+      - CLUSTERING_SERVICE_URL=http://graph_clustering:7276
+
+      # OAuth Credentials
+      - GOOGLE_CLIENT_ID=${GOOGLE_CLIENT_ID}
+      - GOOGLE_CLIENT_SECRET=${GOOGLE_CLIENT_SECRET}
+      - GOOGLE_REDIRECT_URI=${GOOGLE_REDIRECT_URI}
+
+      - GITHUB_CLIENT_ID=${GITHUB_CLIENT_ID}
+      - GITHUB_CLIENT_SECRET=${GITHUB_CLIENT_SECRET}
+      - GITHUB_REDIRECT_URI=${GITHUB_REDIRECT_URI}
+
+      # Other
+      - FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY}
+      - SERPER_API_KEY=${SERPER_API_KEY}
+      - SENDGRID_API_KEY=${SENDGRID_API_KEY}
+      - R2R_SENTRY_DSN=${R2R_SENTRY_DSN}
+      - R2R_SENTRY_ENVIRONMENT=${R2R_SENTRY_ENVIRONMENT}
+      - R2R_SENTRY_TRACES_SAMPLE_RATE=${R2R_SENTRY_TRACES_SAMPLE_RATE}
+      - R2R_SENTRY_PROFILES_SAMPLE_RATE=${R2R_SENTRY_PROFILES_SAMPLE_RATE}
+
+    command: >
+      sh -c '
+        if [ -z "$${HATCHET_CLIENT_TOKEN}" ]; then
+          export HATCHET_CLIENT_TOKEN=$$(cat /hatchet_api_key/api_key.txt)
+        fi
+        exec uvicorn core.main.app_entry:app --host $${R2R_HOST} --port $${R2R_PORT}
+      '
+    env_file:
+      - ./env/r2r-full.env
+    networks:
+      - r2r-network
+    volumes:
+      - ./user_configs:/app/user_configs
+      - ./scripts:/scripts
+        #- ${R2R_CONFIG_PATH:-/}:${R2R_CONFIG_PATH:-/app/config}
+      - hatchet_api_key:/hatchet_api_key:ro
+    extra_hosts:
+      - host.docker.internal:host-gateway
+    depends_on:
+      - setup-token
+      - unstructured
+      - graph_clustering
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:${R2R_PORT:-7272}/v3/health"]
+      interval: 6s
+      timeout: 5s
+      retries: 5
+      start_period: 30s
+    build:
+      context: ../py/
+      dockerfile: ../py/Dockerfile
+    deploy:
+      replicas: ${R2R_REPLICAS:-1}
+      restart_policy:
+        condition: on-failure
+      update_config:
+        parallelism: 1
+        delay: 30s
+        order: start-first
+      rollback_config:
+        parallelism: 1
+        delay: 30s
+        #logging:
+        #driver: fluentd
+        #options:
+        #fluentd-address: host.docker.internal:24224
+        #fluentd-sub-second-precision: "true"
+        #tag: backend
+
+  r2r-dashboard:
+    image: sciphiai/r2r-dashboard:1.0.2
+    environment:
+      - NEXT_PUBLIC_R2R_DEPLOYMENT_URL=${R2R_DEPLOYMENT_URL:-http://localhost:7272}
+      - NEXT_PUBLIC_HATCHET_DASHBOARD_URL=${HATCHET_DASHBOARD_URL:-http://localhost:${R2R_HATCHET_DASHBOARD_PORT:-7274}}
+    ports:
+      - "${R2R_DASHBOARD_PORT:-7273}:3000"
+    deploy:
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+    networks:
+      - r2r-network
+
+  fluent-bit:
+    image: fluent/fluent-bit:latest
+    volumes:
+    - ./docker/fluent-bit:/fluent-bit/etc:ro
+    ports:
+    - "24224:24224"
+    depends_on:
+    - victoria-logs
+    networks:
+      - r2r-network
+
+  grafana:
+    image: grafana/grafana:latest
+    ports:
+    - "3001:3000"
+    env_file:
+    - .env
+    volumes:
+    - ./.data/grafana:/var/lib/grafana
+    networks:
+      - r2r-network
+
+
+  victoria-logs:
+    image: victoriametrics/victoria-logs:v1.10.1-victorialogs
+    ports:
+    - "9428:9428"
+    volumes:
+    - ./.data/victoria-logs:/data
+    command: -storageDataPath=/data -retentionPeriod=60d
+    networks:
+      - r2r-network

+ 26 - 0
docker/env/hatchet.env

@@ -0,0 +1,26 @@
+DATABASE_URL="postgres://hatchet_user:hatchet_password@hatchet-postgres:5432/hatchet?sslmode=disable"
+
+HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH=134217728
+HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH=134217728
+
+DATABASE_POSTGRES_PORT=5432
+DATABASE_POSTGRES_HOST=hatchet-postgres
+DATABASE_POSTGRES_USERNAME=hatchet_user
+DATABASE_POSTGRES_PASSWORD=hatchet_password
+HATCHET_DATABASE_POSTGRES_DB_NAME=hatchet
+POSTGRES_DB=hatchet
+POSTGRES_USER=hatchet_user
+POSTGRES_PASSWORD=hatchet_password
+
+SERVER_TASKQUEUE_RABBITMQ_URL=amqp://user:password@hatchet-rabbitmq:5672/
+SERVER_AUTH_COOKIE_DOMAIN=http://host.docker.internal:7274
+SERVER_URL=http://host.docker.internal:7274
+SERVER_AUTH_COOKIE_INSECURE=t
+SERVER_GRPC_BIND_ADDRESS=0.0.0.0
+SERVER_GRPC_INSECURE=t
+SERVER_GRPC_BROADCAST_ADDRESS=hatchet-engine:7077
+SERVER_GRPC_MAX_MSG_SIZE=134217728
+SERVER_GRPC_PORT="7077"
+
+RABBITMQ_DEFAULT_USER=user
+RABBITMQ_DEFAULT_PASS=password

+ 2 - 0
docker/env/minio.env

@@ -0,0 +1,2 @@
+MINIO_ROOT_USER=minioadmin
+MINIO_ROOT_PASSWORD=minioadmin

+ 6 - 0
docker/env/postgres.env

@@ -0,0 +1,6 @@
+POSTGRES_USER=postgres
+POSTGRES_PASSWORD=postgres
+POSTGRES_HOST=postgres
+POSTGRES_PORT=5432
+POSTGRES_MAX_CONNECTIONS=1024
+PGPORT=5432

+ 4 - 0
docker/env/r2r-dashboard.env

@@ -0,0 +1,4 @@
+NEXT_PUBLIC_R2R_DEPLOYMENT_URL=http://localhost:7272
+NEXT_PUBLIC_HATCHET_DASHBOARD_URL=http://localhost:7274
+NEXT_PUBLIC_R2R_DEFAULT_EMAIL="admin@example.com"
+NEXT_PUBLIC_R2R_DEFAULT_PASSWORD="change_me_immediately"

+ 104 - 0
docker/env/r2r-full-old.env

@@ -0,0 +1,104 @@
+# R2R
+R2R_PORT=7272
+R2R_HOST=0.0.0.0
+R2R_LOG_LEVEL=INFO
+R2R_CONFIG_NAME=
+R2R_CONFIG_PATH=/app/user_config/r2r.toml
+R2R_PROJECT_NAME=r2r_default
+R2R_SECRET_KEY=
+
+# Postgres Configuration
+R2R_POSTGRES_USER=postgres
+R2R_POSTGRES_PASSWORD=cocorobo-123
+R2R_POSTGRES_HOST=postgres
+R2R_POSTGRES_PORT=5432
+R2R_POSTGRES_DBNAME=postgres
+R2R_POSTGRES_MAX_CONNECTIONS=102400
+R2R_POSTGRES_STATEMENT_CACHE_SIZE=10000
+
+# Hatchet
+HATCHET_CLIENT_TLS_STRATEGY=none
+
+# OpenAI
+OPENAI_API_KEY=sk-j9Uwupu0NPZtdDS_IfEZlRWpX1JgFyZFLZProkesy2QbtqMs16pDnylAozU
+OPENAI_API_BASE=https://onehub.cocorobo.cn/v1
+
+# Azure Foundry
+AZURE_FOUNDRY_API_ENDPOINT=
+AZURE_FOUNDRY_API_KEY=
+
+# XAI / GROK
+XAI_API_KEY=
+
+# Anthropic
+ANTHROPIC_API_KEY=
+
+# Azure
+AZURE_API_KEY=
+AZURE_API_BASE=
+AZURE_API_VERSION=
+
+# Google Vertex AI
+GOOGLE_APPLICATION_CREDENTIALS=
+VERTEX_PROJECT=
+VERTEX_LOCATION=
+
+# Google Gemini
+GEMINI_API_KEY=
+
+# AWS Bedrock
+AWS_ACCESS_KEY_ID=AKIATLPEDU37QV5CHLMH
+AWS_SECRET_ACCESS_KEY=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
+AWS_REGION_NAME=cn-northwest-1
+
+# Groq
+GROQ_API_KEY=
+
+# Cohere
+COHERE_API_KEY=
+
+# Anyscale
+ANYSCALE_API_KEY=
+
+# Ollama
+OLLAMA_API_BASE=http://34.228.204.21:11434
+
+# LM Studio
+LM_STUDIO_API_BASE=
+LM_STUDIO_API_KEY=1234
+
+# Huggingface
+HUGGINGFACE_API_BASE=
+HUGGINGFACE_API_KEY=
+
+# Unstructured
+UNSTRUCTURED_API_KEY=
+UNSTRUCTURED_API_URL=https://api.unstructured.io/general/v0/general
+UNSTRUCTURED_SERVICE_URL=http://unstructured:7275
+UNSTRUCTURED_NUM_WORKERS=10
+
+# Graphologic
+CLUSTERING_SERVICE_URL=http://graph_clustering:7276
+
+# OAuth Credentials
+GOOGLE_CLIENT_ID=
+GOOGLE_CLIENT_SECRET=
+GOOGLE_REDIRECT_URI=
+
+GITHUB_CLIENT_ID=
+GITHUB_CLIENT_SECRET=
+GITHUB_REDIRECT_URI=
+
+# Email
+MAILERSEND_API_KEY=
+SENDGRID_API_KEY=
+
+# Websearch
+FIRECRAWL_API_KEY=
+SERPER_API_KEY=
+
+# Sentry Tracing
+R2R_SENTRY_DSN=
+R2R_SENTRY_ENVIRONMENT=
+R2R_SENTRY_TRACES_SAMPLE_RATE=
+R2R_SENTRY_PROFILES_SAMPLE_RATE=

+ 110 - 0
docker/env/r2r-full.env

@@ -0,0 +1,110 @@
+# R2R
+R2R_PORT=7272
+R2R_HOST=0.0.0.0
+R2R_LOG_LEVEL=INFO
+R2R_CONFIG_NAME=
+R2R_CONFIG_PATH=/app/user_config/r2r.toml
+R2R_PROJECT_NAME=r2r_default
+R2R_SECRET_KEY=
+R2R_USER_TOOLS_PATH=/app/user_tools
+R2R_LOG_FORMAT=
+
+# Postgres Configuration
+R2R_POSTGRES_USER=postgres
+R2R_POSTGRES_PASSWORD=cocorobo-123
+R2R_POSTGRES_HOST=postgres
+R2R_POSTGRES_PORT=5432
+R2R_POSTGRES_DBNAME=postgres
+R2R_POSTGRES_MAX_CONNECTIONS=10240
+R2R_POSTGRES_STATEMENT_CACHE_SIZE=100
+
+# Hatchet
+HATCHET_CLIENT_TLS_STRATEGY=none
+
+# OpenAI
+OPENAI_API_KEY=sk-j9Uwupu0NPZtdDS_IfEZlRWpX1JgFyZFLZProkesy2QbtqMs16pDnylAozU
+OPENAI_API_BASE=http://172.16.12.13:3000/v1
+
+# Azure Foundry
+AZURE_FOUNDRY_API_ENDPOINT=
+AZURE_FOUNDRY_API_KEY=
+
+# XAI / GROK
+XAI_API_KEY=
+
+# Anthropic
+ANTHROPIC_API_KEY=
+
+# Azure
+AZURE_API_KEY=
+AZURE_API_BASE=
+AZURE_API_VERSION=
+
+# Google Vertex AI
+GOOGLE_APPLICATION_CREDENTIALS=
+VERTEX_PROJECT=
+VERTEX_LOCATION=
+
+# Google Gemini
+GEMINI_API_KEY=
+
+# Mistral
+MISTRAL_API_KEY=vihwzM2StaxXxicsb1alLN7Y4oAM0EbD
+
+# AWS Bedrock
+AWS_ACCESS_KEY_ID=AKIATLPEDU37QV5CHLMH
+AWS_SECRET_ACCESS_KEY=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
+AWS_REGION_NAME=cn-northwest-1
+
+# Groq
+GROQ_API_KEY=
+
+# Cohere
+COHERE_API_KEY=
+
+# Anyscale
+ANYSCALE_API_KEY=
+
+# Ollama
+OLLAMA_API_BASE=http://34.228.204.21:11434
+
+# LM Studio
+LM_STUDIO_API_BASE=
+LM_STUDIO_API_KEY=1234
+
+# Huggingface
+HUGGINGFACE_API_BASE=
+HUGGINGFACE_API_KEY=
+
+# Unstructured
+UNSTRUCTURED_API_KEY=cJR72sMAYxCtT6Pmd2PW3KmrjNcKp5
+UNSTRUCTURED_API_URL=https://api.unstructured.io/general/v0/general
+UNSTRUCTURED_SERVICE_URL=http://unstructured:7275
+UNSTRUCTURED_NUM_WORKERS=10
+
+# Graphologic
+CLUSTERING_SERVICE_URL=http://graph_clustering:7276
+
+# OAuth Credentials
+GOOGLE_CLIENT_ID=
+GOOGLE_CLIENT_SECRET=
+GOOGLE_REDIRECT_URI=
+
+GITHUB_CLIENT_ID=
+GITHUB_CLIENT_SECRET=
+GITHUB_REDIRECT_URI=
+
+# Email
+MAILERSEND_API_KEY=
+SENDGRID_API_KEY=
+
+# Websearch
+FIRECRAWL_API_KEY=
+SERPER_API_KEY=
+TAVILY_API_KEY=
+
+# Sentry Tracing
+R2R_SENTRY_DSN=
+R2R_SENTRY_ENVIRONMENT=
+R2R_SENTRY_TRACES_SAMPLE_RATE=
+R2R_SENTRY_PROFILES_SAMPLE_RATE=

+ 110 - 0
docker/env/r2r.env

@@ -0,0 +1,110 @@
+# R2R
+R2R_PORT=7272
+R2R_HOST=0.0.0.0
+R2R_LOG_LEVEL=INFO
+R2R_CONFIG_NAME=
+R2R_CONFIG_PATH=
+R2R_PROJECT_NAME=r2r_default
+R2R_SECRET_KEY=
+R2R_USER_TOOLS_PATH=/app/user_tools
+R2R_LOG_FORMAT=
+
+# Postgres Configuration
+R2R_POSTGRES_USER=postgres
+R2R_POSTGRES_PASSWORD=postgres
+R2R_POSTGRES_HOST=postgres
+R2R_POSTGRES_PORT=5432
+R2R_POSTGRES_DBNAME=postgres
+R2R_POSTGRES_MAX_CONNECTIONS=1024
+R2R_POSTGRES_STATEMENT_CACHE_SIZE=100
+
+# Hatchet
+HATCHET_CLIENT_TLS_STRATEGY=none
+
+# OpenAI
+OPENAI_API_KEY=
+OPENAI_API_BASE=
+
+# Azure Foundry
+AZURE_FOUNDRY_API_ENDPOINT=
+AZURE_FOUNDRY_API_KEY=
+
+# XAI / GROK
+XAI_API_KEY=
+
+# Anthropic
+ANTHROPIC_API_KEY=
+
+# Azure
+AZURE_API_KEY=
+AZURE_API_BASE=
+AZURE_API_VERSION=
+
+# Google Vertex AI
+GOOGLE_APPLICATION_CREDENTIALS=
+VERTEX_PROJECT=
+VERTEX_LOCATION=
+
+# Google Gemini
+GEMINI_API_KEY=
+
+# Mistral
+MISTRAL_API_KEY=
+
+# AWS Bedrock
+AWS_ACCESS_KEY_ID=
+AWS_SECRET_ACCESS_KEY=
+AWS_REGION_NAME=
+
+# Groq
+GROQ_API_KEY=
+
+# Cohere
+COHERE_API_KEY=
+
+# Anyscale
+ANYSCALE_API_KEY=
+
+# Ollama
+OLLAMA_API_BASE=http://host.docker.internal:11434
+
+# LM Studio
+LM_STUDIO_API_BASE=http://host.docker.internal:1234
+LM_STUDIO_API_KEY=1234
+
+# Huggingface
+HUGGINGFACE_API_BASE=http://host.docker.internal:8080
+HUGGINGFACE_API_KEY=
+
+# Unstructured
+UNSTRUCTURED_API_KEY=
+UNSTRUCTURED_API_URL=https://api.unstructured.io/general/v0/general
+UNSTRUCTURED_SERVICE_URL=http://unstructured:7275
+UNSTRUCTURED_NUM_WORKERS=10
+
+# Graphologic
+CLUSTERING_SERVICE_URL=http://graph_clustering:7276
+
+# OAuth Credentials
+GOOGLE_CLIENT_ID=
+GOOGLE_CLIENT_SECRET=
+GOOGLE_REDIRECT_URI=
+
+GITHUB_CLIENT_ID=
+GITHUB_CLIENT_SECRET=
+GITHUB_REDIRECT_URI=
+
+# Email
+MAILERSEND_API_KEY=
+SENDGRID_API_KEY=
+
+# Websearch
+FIRECRAWL_API_KEY=
+SERPER_API_KEY=
+TAVILY_API_KEY=
+
+# Sentry Tracing
+R2R_SENTRY_DSN=
+R2R_SENTRY_ENVIRONMENT=
+R2R_SENTRY_TRACES_SAMPLE_RATE=
+R2R_SENTRY_PROFILES_SAMPLE_RATE=

+ 26 - 0
docker/fluent-bit/fluent-bit.conf

@@ -0,0 +1,26 @@
+[SERVICE]
+    Flush        1
+    Daemon       Off
+    Log_Level    info
+    Parsers_File parsers.conf
+
+[INPUT]
+    Tag    backend
+    Name   forward
+    Listen 0.0.0.0
+    Port   24224
+
+[FILTER]
+    Match    backend
+    Name     parser
+    Key_Name log
+    Parser   json
+
+[OUTPUT]
+    Match            backend
+    Name             http
+    host             host.docker.internal
+    port             9428
+    uri              /insert/jsonline?_stream_fields=log&_msg_field=msg,message&_time_field=date
+    format           json_lines
+    json_date_format iso8601

+ 3 - 0
docker/fluent-bit/parsers.conf

@@ -0,0 +1,3 @@
+[PARSER]
+    Name   json
+    Format json

+ 15 - 0
docker/scripts/create-hatchet-db.sh

@@ -0,0 +1,15 @@
+#!/bin/bash
+
+set -e
+echo 'Waiting for PostgreSQL to be ready...'
+while ! pg_isready -h hatchet-postgres -p 5432 -U ${HATCHET_POSTGRES_USER:-hatchet_user}; do
+  sleep 1
+done
+
+echo 'PostgreSQL is ready, checking if database exists...'
+if ! PGPASSWORD=${HATCHET_POSTGRES_PASSWORD:-hatchet_password} psql -h hatchet-postgres -p 5432 -U ${HATCHET_POSTGRES_USER:-hatchet_user} -lqt | grep -qw ${HATCHET_POSTGRES_DBNAME:-hatchet}; then
+  echo 'Database does not exist, creating it...'
+  PGPASSWORD=${HATCHET_POSTGRES_PASSWORD:-hatchet_password} createdb -h hatchet-postgres -p 5432 -U ${HATCHET_POSTGRES_USER:-hatchet_user} -w ${HATCHET_POSTGRES_DBNAME:-hatchet}
+else
+  echo 'Database already exists, skipping creation.'
+fi

+ 52 - 0
docker/scripts/setup-token.sh

@@ -0,0 +1,52 @@
+#!/bin/bash
+
+set -e
+echo 'Starting token creation process...'
+
+# Attempt to create token and capture both stdout and stderr
+TOKEN_OUTPUT=$(/hatchet/hatchet-admin token create --config /hatchet/config --tenant-id 707d0855-80ab-4e1f-a156-f1c4546cbf52 2>&1)
+
+# Extract the token (assuming it's the only part that looks like a JWT)
+TOKEN=$(echo "$TOKEN_OUTPUT" | grep -Eo 'eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*')
+
+if [ -z "$TOKEN" ]; then
+    echo 'Error: Failed to extract token. Full command output:' >&2
+    echo "$TOKEN_OUTPUT" >&2
+    exit 1
+fi
+
+echo "$TOKEN" > /tmp/hatchet_api_key
+echo 'Token created and saved to /tmp/hatchet_api_key'
+
+# Copy token to final destination
+echo -n "$TOKEN" > /hatchet_api_key/api_key.txt
+echo 'Token copied to /hatchet_api_key/api_key.txt'
+
+# Verify token was copied correctly
+if [ "$(cat /tmp/hatchet_api_key)" != "$(cat /hatchet_api_key/api_key.txt)" ]; then
+    echo 'Error: Token copy failed, files do not match' >&2
+    echo 'Content of /tmp/hatchet_api_key:'
+    cat /tmp/hatchet_api_key
+    echo 'Content of /hatchet_api_key/api_key.txt:'
+    cat /hatchet_api_key/api_key.txt
+    exit 1
+fi
+
+echo 'Hatchet API key has been saved successfully'
+echo 'Token length:' ${#TOKEN}
+echo 'Token (first 20 chars):' ${TOKEN:0:20}
+echo 'Token structure:' $(echo $TOKEN | awk -F. '{print NF-1}') 'parts'
+
+# Check each part of the token
+for i in 1 2 3; do
+    PART=$(echo $TOKEN | cut -d. -f$i)
+    echo 'Part' $i 'length:' ${#PART}
+    echo 'Part' $i 'base64 check:' $(echo $PART | base64 -d >/dev/null 2>&1 && echo 'Valid' || echo 'Invalid')
+done
+
+# Final validation attempt
+if ! echo $TOKEN | awk -F. '{print $2}' | base64 -d 2>/dev/null | jq . >/dev/null 2>&1; then
+    echo 'Warning: Token payload is not valid JSON when base64 decoded' >&2
+else
+    echo 'Token payload appears to be valid JSON'
+fi

+ 9 - 0
docker/scripts/start-r2r.sh

@@ -0,0 +1,9 @@
+#!/bin/bash
+
+# Check if HATCHET_CLIENT_TOKEN is set, if not read it from the API key file
+if [ -z "${HATCHET_CLIENT_TOKEN}" ]; then
+  export HATCHET_CLIENT_TOKEN=$(cat /hatchet_api_key/api_key.txt)
+fi
+
+# Start the application
+exec uvicorn core.main.app_entry:app --host ${R2R_HOST} --port ${R2R_PORT}

+ 264 - 0
docker/use-docker-compose.yaml

@@ -0,0 +1,264 @@
+networks:
+  r2r-network:
+    external: true
+    attachable: true
+    labels:
+      - "com.docker.compose.recreate=always"
+    #name: r2r_r2r-network
+
+volumes:
+  hatchet_certs:
+    name: hatchet_certs
+  hatchet_config:
+    name: hatchet_config
+  hatchet_api_key:
+    name: hatchet_api_key
+  postgres_data:
+    name: postgres_data
+  hatchet_rabbitmq_data:
+    name: hatchet_rabbitmq_data
+  hatchet_rabbitmq_conf:
+    name: hatchet_rabbitmq_conf
+  hatchet_postgres_data:
+    name: hatchet_postgres_data
+
+services:
+  postgres:
+    image: pgvector/pgvector:pg16
+    profiles: [postgres]
+    env_file:
+      - ./env/postgres.env
+    volumes:
+      - ./postgres_data:/var/lib/postgresql/data
+    networks:
+      - r2r-network
+    ports:
+      - "5432:5432"
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U postgres"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    restart: on-failure
+    command: >
+      postgres
+      -c max_connections=1024
+
+  hatchet-postgres:
+    image: postgres:latest
+    env_file:
+      - ./env/hatchet.env
+    volumes:
+      - ./hatchet_postgres_data:/var/lib/postgresql/data
+    networks:
+      - r2r-network
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U hatchet_user -d hatchet"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+
+  hatchet-rabbitmq:
+    image: "rabbitmq:3-management"
+    hostname: "hatchet-rabbitmq"
+    ports:
+      - "5673:5672"
+      - "15673:15672"
+    env_file:
+      - ./env/hatchet.env
+    volumes:
+      - hatchet_rabbitmq_data:/var/lib/rabbitmq
+      - hatchet_rabbitmq_conf:/etc/rabbitmq/rabbitmq.conf
+    networks:
+      - r2r-network
+    healthcheck:
+      test: ["CMD", "rabbitmqctl", "status"]
+      interval: 10s
+      timeout: 10s
+      retries: 5
+
+  hatchet-create-db:
+    image: postgres:latest
+    command: sh /scripts/create-hatchet-db.sh
+    volumes:
+      - ./scripts:/scripts
+    networks:
+      - r2r-network
+    env_file:
+      - ./env/hatchet.env
+
+  hatchet-migration:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-migrate:v0.53.15
+    env_file:
+      - ./env/hatchet.env
+    networks:
+      - r2r-network
+    depends_on:
+      hatchet-create-db:
+        condition: service_completed_successfully
+
+  hatchet-setup-config:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:v0.53.15
+    command: /hatchet/hatchet-admin quickstart --skip certs --generated-config-dir /hatchet/config --overwrite=false
+    env_file:
+      - ./env/hatchet.env
+    volumes:
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+    networks:
+      - r2r-network
+    depends_on:
+      hatchet-migration:
+        condition: service_completed_successfully
+      hatchet-rabbitmq:
+        condition: service_healthy
+
+  hatchet-engine:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-engine:v0.53.15
+    command: /hatchet/hatchet-engine --config /hatchet/config
+    restart: on-failure
+    depends_on:
+      hatchet-setup-config:
+        condition: service_completed_successfully
+    ports:
+      - "7077:7077"
+    env_file:
+      - ./env/hatchet.env
+    volumes:
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+    networks:
+      - r2r-network
+    healthcheck:
+      test: ["CMD", "wget", "-q", "-O", "-", "http://localhost:8733/live"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+  hatchet-dashboard:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-dashboard:v0.53.15
+    command: sh ./entrypoint.sh --config /hatchet/config
+    restart: on-failure
+    depends_on:
+      hatchet-setup-config:
+        condition: service_completed_successfully
+    env_file:
+      - ./env/hatchet.env
+    volumes:
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+    networks:
+      - r2r-network
+    ports:
+      - "7274:80"
+
+  setup-token:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:v0.53.15
+    command: sh /scripts/setup-token.sh
+    volumes:
+      - ./scripts:/scripts
+      - hatchet_certs:/hatchet/certs
+      - hatchet_config:/hatchet/config
+      - hatchet_api_key:/hatchet_api_key
+    networks:
+      - r2r-network
+    depends_on:
+      hatchet-setup-config:
+        condition: service_completed_successfully
+
+  unstructured:
+    image: ragtoriches/unst-prod
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:7275/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    networks:
+      - r2r-network
+
+  graph_clustering:
+    image: ragtoriches/cluster-prod
+    ports:
+      - "7276:7276"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:7276/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    networks:
+      - r2r-network
+
+  r2r:
+    image: sciphiai/r2r:latest
+    ports:
+      - "7272:7272"
+    env_file:
+      - ./env/r2r-full.env
+    command: sh /scripts/start-r2r.sh
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:7272/v3/health"]
+      interval: 6s
+      timeout: 5s
+      retries: 5
+    restart: on-failure
+    volumes:
+      - ./user_configs:/app/user_configs
+      - hatchet_api_key:/hatchet_api_key:ro
+      - ./scripts:/scripts
+    networks:
+      - r2r-network
+    extra_hosts:
+      - host.docker.internal:host-gateway
+    depends_on:
+      setup-token:
+        condition: service_completed_successfully
+      unstructured:
+        condition: service_healthy
+      graph_clustering:
+        condition: service_healthy
+    logging:
+      driver: fluentd
+      options:
+        fluentd-address: host.docker.internal:24224
+        fluentd-sub-second-precision: "true"
+        tag: backend
+
+  r2r-dashboard:
+    image: sciphiai/r2r-dashboard:1.0.2
+    env_file:
+      - ./env/r2r-dashboard.env
+    ports:
+      - "7273:3000"
+    networks:
+      - r2r-network
+
+  fluent-bit:
+    image: fluent/fluent-bit:latest
+    volumes:
+    - ./fluent-bit:/fluent-bit/etc:ro
+    ports:
+    - "24224:24224"
+    depends_on:
+    - victoria-logs
+    networks:
+      - r2r-network
+
+  grafana:
+    image: grafana/grafana:latest
+    ports:
+    - "3001:3000"
+    volumes:
+    - ./.data/grafana:/var/lib/grafana
+    networks:
+      - r2r-network
+
+  victoria-logs:
+    image: victoriametrics/victoria-logs:v1.10.1-victorialogs
+    ports:
+    - "9428:9428"
+    volumes:
+    - ./.data/victoria-logs:/data
+    networks:
+      - r2r-network
+    command: -storageDataPath=/data -retentionPeriod=60d

BIN
docker/user_configs/.r2r.toml.swp


+ 29 - 0
docker/user_configs/README.md

@@ -0,0 +1,29 @@
+# User Configs Directory
+
+## Overview
+This directory is mounted inside the R2R Docker container and is intended for custom configuration files. Any files placed here will be accessible to the application running in the container.
+
+## Usage
+1. Place your custom configuration files in this directory.
+2. Set the `R2R_CONFIG_PATH` in the `r2r.env` or `r2r-full.env` files.
+3. The path format inside the container is: `/app/user_configs/<config>.toml`
+
+## Configuration
+The application uses the environment variable you set to locate your configuration file:
+```
+R2R_CONFIG_PATH=/app/user_configs/<config>.toml
+```
+
+If you want to use a different filename, update the `R2R_CONFIG_PATH` variable in your environment file to point to your custom file, for example:
+```
+R2R_CONFIG_PATH=/app/user_configs/my_custom_config.toml
+```
+
+## Troubleshooting
+If you encounter configuration errors, check:
+1. Your configuration file exists in this directory
+2. The filename matches what's specified in `R2R_CONFIG_PATH`
+3. The file has proper permissions (readable)
+4. The file contains valid TOML syntax
+
+For more detailed configuration information, see the main documentation.

+ 290 - 0
docker/user_configs/all_possible_config.toml

@@ -0,0 +1,290 @@
+################################################################################
+# Global Application Settings (AppConfig)
+################################################################################
+[app]
+# Global project name (optional)
+project_name = ""
+# Maximum number of documents per user (default from code: 100, sample: 10000)
+default_max_documents_per_user = 100
+# Maximum number of chunks per user (default: 10000)
+default_max_chunks_per_user = 10000
+# Maximum number of collections per user (default: 5)
+default_max_collections_per_user = 5
+# Maximum upload size in bytes (default: 2000000 ~2MB)
+default_max_upload_size = 2000000
+# LLM used for user‐facing output (quality)
+quality_llm = ""
+# LLM used for fast internal operations
+fast_llm = ""
+# LLM used for visual inputs
+vlm = ""
+# LLM used for audio transcription
+audio_lm = ""
+# A mapping from file extension to maximum upload size
+  [app.max_upload_size_by_type]
+    txt  = 2000000
+    md   = 2000000
+    tsv  = 2000000
+    csv  = 5000000
+    xml  = 2000000
+    html = 5000000
+    doc  = 10000000
+    docx = 10000000
+    ppt  = 20000000
+    pptx = 20000000
+    xls  = 10000000
+    xlsx = 10000000
+    odt  = 5000000
+    pdf  = 30000000
+    eml  = 5000000
+    msg  = 5000000
+    p7s  = 5000000
+    bmp  = 5000000
+    heic = 5000000
+    jpeg = 5000000
+    jpg  = 5000000
+    png  = 5000000
+    tiff = 5000000
+    epub = 10000000
+    rtf  = 5000000
+    rst  = 5000000
+    org  = 5000000
+
+################################################################################
+# Agent Settings (Custom configuration used by your system)
+################################################################################
+[agent]
+rag_agent_static_prompt = "static_rag_agent"
+rag_agent_dynamic_prompt = "dynamic_rag_agent"
+tools = ["search_file_knowledge", "content"]
+
+################################################################################
+# Authentication Settings (AuthConfig)
+################################################################################
+[auth]
+provider = "r2r"
+# (Optional secret key for signing tokens)
+secret_key = ""
+# Lifetime for access tokens (in minutes)
+access_token_lifetime_in_minutes = 60000
+# Lifetime for refresh tokens (in days)
+refresh_token_lifetime_in_days = 7
+# Whether authentication is required
+require_authentication = false
+# Whether email verification is required
+require_email_verification = false
+# Default admin credentials
+default_admin_email = "admin@example.com"
+default_admin_password = "change_me_immediately"
+
+################################################################################
+# Completion / LLM Generation Settings (CompletionConfig and nested GenerationConfig)
+################################################################################
+[completion]
+provider = "r2r"
+# Maximum number of concurrent requests allowed
+concurrent_request_limit = 256
+
+  [completion.generation_config]
+  # Generation parameters
+  temperature = 0.1
+  top_p = 1.0
+  max_tokens_to_sample = 4096
+  stream = false
+  # Additional generation kwargs (empty table by default)
+  add_generation_kwargs = {}
+
+################################################################################
+# Cryptography Settings (CryptoConfig)
+################################################################################
+[crypto]
+provider = "bcrypt"
+
+################################################################################
+# Database Settings (DatabaseConfig and related nested settings)
+################################################################################
+[database]
+provider = "postgres"
+user = ""
+password = ""
+host = "localhost"
+port = 5432
+db_name = ""
+project_name = ""
+default_collection_name = "Default"
+default_collection_description = "Your default collection."
+collection_summary_system_prompt = "system"
+collection_summary_prompt = "collection_summary"
+enable_fts = false
+batch_size = 1
+kg_store_path = ""
+
+  # PostgreSQL tuning settings
+  [database.postgres_configuration_settings]
+    checkpoint_completion_target = 0.9
+    default_statistics_target = 100
+    effective_io_concurrency = 1
+    effective_cache_size = 524288
+    huge_pages = "try"
+    maintenance_work_mem = 65536
+    max_connections = 256
+    max_parallel_workers_per_gather = 2
+    max_parallel_workers = 8
+    max_parallel_maintenance_workers = 2
+    max_wal_size = 1024
+    max_worker_processes = 8
+    min_wal_size = 80
+    shared_buffers = 16384
+    statement_cache_size = 100
+    random_page_cost = 4.0
+    wal_buffers = 512
+    work_mem = 4096
+
+  # Graph creation settings
+  [database.graph_creation_settings]
+    graph_entity_description_prompt = "graph_entity_description"
+    graph_extraction_prompt = "graph_extraction"
+    entity_types = []
+    relation_types = []
+    automatic_deduplication = true
+
+  # Graph enrichment settings
+  [database.graph_enrichment_settings]
+    graph_communities_prompt = "graph_communities"
+
+  # (Optional) Graph search settings – add fields as needed
+  [database.graph_search_settings]
+    # e.g., search_mode = "default"
+
+  # Rate limiting settings
+  [database.limits]
+    global_per_min = 60
+    route_per_min = 20
+    monthly_limit = 10000
+
+  # Route-specific limits (empty by default)
+  [database.route_limits]
+    # e.g., "/api/search" = { global_per_min = 30, route_per_min = 10, monthly_limit = 5000 }
+
+  # User-specific limits (empty by default)
+  [database.user_limits]
+    # e.g., "user_uuid_here" = { global_per_min = 20, route_per_min = 5, monthly_limit = 2000 }
+
+################################################################################
+# Embedding Settings (EmbeddingConfig)
+################################################################################
+[embedding]
+provider = "litellm"
+base_model = "openai/text-embedding-3-small"
+base_dimension = 512
+# Optional reranking settings (leave empty if not used)
+rerank_model = ""
+rerank_url = ""
+batch_size = 1
+prefixes = {}   # Provide prefix overrides here if needed
+add_title_as_prefix = true
+concurrent_request_limit = 256
+max_retries = 3
+initial_backoff = 1.0
+max_backoff = 64.0
+# Deprecated fields (if still used)
+rerank_dimension = 0
+rerank_transformer_type = ""
+
+  # Vector quantization settings for embeddings
+  [embedding.quantization_settings]
+    quantization_type = "FP32"
+    # (Additional quantization parameters can be added here)
+
+################################################################################
+# Completion Embedding Settings
+# (Usually mirrors the embedding settings; override if needed.)
+################################################################################
+[completion_embedding]
+provider = "litellm"
+base_model = "openai/text-embedding-3-small"
+base_dimension = 512
+batch_size = 1
+add_title_as_prefix = true
+concurrent_request_limit = 256
+
+################################################################################
+# File Storage Settings
+################################################################################
+[file]
+provider = "postgres"
+
+################################################################################
+# Ingestion Settings (IngestionConfig and nested settings)
+################################################################################
+[ingestion]
+provider = "r2r"
+excluded_parsers = ["mp4"]
+chunking_strategy = "recursive"
+chunk_size = 1024
+# Extra field handled by extra_fields – not defined explicitly in IngestionConfig:
+chunk_overlap = 512
+automatic_extraction = true
+# Audio transcription and vision model settings
+audio_transcription_model = ""
+skip_document_summary = false
+document_summary_system_prompt = "system"
+document_summary_task_prompt = "summary"
+document_summary_max_length = 100000
+chunks_for_document_summary = 128
+document_summary_model = ""
+parser_overrides = {}
+
+  # Chunk enrichment settings
+  [ingestion.chunk_enrichment_settings]
+    chunk_enrichment_prompt = "chunk_enrichment"
+    enable_chunk_enrichment = false
+    n_chunks = 2
+
+  # Extra parsers (mapping from file type to parser name)
+  [ingestion.extra_parsers]
+    pdf = "zerox"
+
+################################################################################
+# Logging Settings
+################################################################################
+[logging]
+provider = "r2r"
+log_table = "logs"
+log_info_table = "log_info"
+
+################################################################################
+# Orchestration Settings (OrchestrationConfig)
+################################################################################
+[orchestration]
+provider = "simple"
+max_runs = 2048
+kg_creation_concurrency_limit = 32
+ingestion_concurrency_limit = 16
+kg_concurrency_limit = 4
+
+################################################################################
+# Prompt Settings
+################################################################################
+[prompt]
+provider = "r2r"
+
+################################################################################
+# Email Settings (EmailConfig)
+################################################################################
+[email]
+# Supported providers: "smtp", "console", "sendgrid", etc.
+provider = "console"
+smtp_server = ""
+smtp_port = 587
+smtp_username = ""
+smtp_password = ""
+from_email = ""
+use_tls = true
+sendgrid_api_key = ""
+mailersend_api_key = ""
+verify_email_template_id = ""
+reset_password_template_id = ""
+password_changed_template_id = ""
+frontend_url = ""
+sender_name = ""

+ 21 - 0
docker/user_configs/full.toml

@@ -0,0 +1,21 @@
+[completion]
+provider = "r2r"
+concurrent_request_limit = 256
+
+[ingestion]
+provider = "r2r"
+strategy = "auto"
+chunking_strategy = "by_title"
+new_after_n_chars = 2_048
+max_characters = 4_096
+combine_under_n_chars = 800
+overlap = 400
+
+    [ingestion.extra_parsers]
+    pdf = "zerox"
+
+[orchestration]
+provider = "hatchet"
+kg_creation_concurrency_limit = 32
+ingestion_concurrency_limit = 16
+kg_concurrency_limit = 8

+ 195 - 0
docker/user_configs/r2r.old.toml

@@ -0,0 +1,195 @@
+[app]
+# app settings are global available like `r2r_config.agent.app`
+# project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var
+default_max_documents_per_user = 10_000
+default_max_chunks_per_user = 10_000_000
+default_max_collections_per_user = 5_000
+
+# Set the default max upload size to 2 GB for local testing
+default_max_upload_size = 2147483648  # 2 GB for anything not explicitly listed
+
+  [app.max_upload_size_by_type]
+    # Common text-based formats
+    txt   = 2147483648  # 2 GB
+    md    = 2147483648
+    tsv   = 2147483648
+    csv   = 2147483648
+    xml   = 2147483648
+    html  = 2147483648
+
+    # Office docs
+    doc   = 2147483648
+    docx  = 2147483648
+    ppt   = 2147483648
+    pptx  = 2147483648
+    xls   = 2147483648
+    xlsx  = 2147483648
+    odt   = 2147483648
+
+    # PDFs
+    pdf   = 2147483648
+
+    # E-mail
+    eml   = 2147483648
+    msg   = 2147483648
+    p7s   = 2147483648
+
+    # Images
+    bmp   = 2147483648
+    heic  = 2147483648
+    jpeg  = 2147483648
+    jpg   = 2147483648
+    png   = 2147483648
+    tiff  = 2147483648
+
+    # E-books and other formats
+    epub  = 2147483648
+    rtf   = 2147483648
+    rst   = 2147483648
+    org   = 2147483648
+
+[agent]
+system_instruction_name = "rag_agent"
+# tool_names = ["local_search", "web_search"] # uncomment to enable web search
+tool_names = ["local_search"]
+
+  [agent.generation_config]
+  model = "openai/gpt-4o"
+
+[auth]
+provider = "r2r"
+access_token_lifetime_in_minutes = 60000 # set a very high default value, for easier testing
+refresh_token_lifetime_in_days = 7
+require_authentication = false
+require_email_verification = false
+default_admin_email = "xujiawei@cocorobo.cc"
+default_admin_password = "usestudio-1"
+
+[completion]
+provider = "r2r"
+concurrent_request_limit = 256
+fast_llm = "openai/gpt-4o-mini"
+
+  [completion.generation_config]
+  model = "openai/gpt-4o"
+  temperature = 0.1
+  top_p = 1
+  max_tokens_to_sample = 1_024
+  stream = false
+  add_generation_kwargs = { }
+
+[crypto]
+provider = "bcrypt"
+
+[database]
+provider = "postgres"
+default_collection_name = "Default"
+default_collection_description = "Your default collection."
+# collection_summary_system_prompt = 'default_system'
+# collection_summary_task_prompt = 'default_collection_summary'
+
+# KG settings
+batch_size = 256
+
+  [database.graph_creation_settings]
+    clustering_mode = "local"
+    graph_entity_description_prompt = "graphrag_entity_description"
+    entity_types = [] # if empty, all entities are extracted
+    relation_types = [] # if empty, all relations are extracted
+    fragment_merge_count = 1 # number of fragments to merge into a single extraction
+    max_knowledge_relationships = 100
+    max_description_input_length = 65536
+    generation_config = { model = "openai/gpt-4o-mini", max_tokens_to_sample = 4_096 } # and other params, model used for relationshipt extraction
+    automatic_deduplication = true # enable automatic deduplication of entities
+
+  [database.graph_enrichment_settings]
+    community_reports_prompt = "graphrag_community_reports"
+    max_summary_input_length = 65536
+    generation_config = { model = "openai/gpt-4o-mini", max_tokens_to_sample = 4_096 } # and other params, model used for node description and graph clustering
+    leiden_params = {}
+
+  [database.graph_search_settings]
+    generation_config = { model = "openai/gpt-4o-mini" }
+
+  [database.limits]
+    # Default fallback limits if no route or user-level overrides are found
+    global_per_min = 300
+    monthly_limit = 10000
+
+  [database.route_limits]
+    # Set the `v3/retrieval/search` route to have a maximum of 5 requests per minute
+    "/v3/retrieval/search" = { route_per_min = 120 }
+    "/v3/retrieval/rag" = { route_per_min = 30 }
+
+[embedding]
+provider = "litellm"
+
+# For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512`
+
+# RECOMMENDED - For advanced applications,
+# use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization
+#base_model = "openai/text-embedding-3-small"
+#base_dimension = 512
+
+base_model = "openai/text-embedding-3-large"
+base_dimension = 256
+
+
+# rerank_model = "huggingface/mixedbread-ai/mxbai-rerank-large-v1" # reranking model
+
+batch_size = 128
+add_title_as_prefix = false
+concurrent_request_limit = 256
+quantization_settings = { quantization_type = "FP32" }
+
+[completion_embedding]
+# Generally this should be the same as the embedding config, but advanced users may want to run with a different provider to reduce latency
+provider = "litellm"
+base_model = "openai/text-embedding-3-large"
+base_dimension = 256
+batch_size = 128
+add_title_as_prefix = false
+concurrent_request_limit = 256
+
+[file]
+provider = "postgres"
+
+[ingestion]
+provider = "r2r"
+chunking_strategy = "recursive"
+chunk_size = 800
+chunk_overlap = 400
+excluded_parsers = ["mp4"]
+
+# Ingestion-time document summary parameters
+# skip_document_summary = False
+# document_summary_system_prompt = 'default_system'
+# document_summary_task_prompt = 'default_summary'
+# chunks_for_document_summary = 128
+document_summary_model = "openai/gpt-4o-mini"
+vision_img_model = "openai/gpt-4o"
+vision_pdf_model = "openai/gpt-4o"
+automatic_extraction = true # enable automatic extraction of entities and relations
+
+  [ingestion.chunk_enrichment_settings]
+    enable_chunk_enrichment = false # disabled by default
+    n_chunks = 2 # the number of chunks (both preceeding and succeeding) to use in enrichment
+    generation_config = { model = "openai/gpt-4o-mini" }
+
+  [ingestion.extra_parsers]
+    pdf = "zerox"
+
+[logging]
+provider = "r2r"
+log_table = "logs"
+log_info_table = "log_info"
+
+[orchestration]
+provider = "simple"
+
+
+[prompt]
+provider = "r2r"
+
+[email]
+provider = "console_mock"

+ 317 - 0
docker/user_configs/r2r.toml

@@ -0,0 +1,317 @@
+[app]
+# app settings are global available like `r2r_config.agent.app`
+# project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var
+default_max_documents_per_user = 1_000_000_000
+default_max_chunks_per_user = 10_000_000_000
+default_max_collections_per_user = 1_000_000_000
+
+# Set the default max upload size to 2 GB for local testing
+default_max_upload_size = 2147483648  # 2 GB for anything not explicitly listed
+
+# LLM used for internal operations, like deriving conversation names
+fast_llm = "openai/gpt-5-nano"
+
+# LLM used for user-facing output, like RAG replies
+quality_llm = "openai/gpt-5-mini"
+
+# LLM used for ingesting visual inputs
+#vlm = "openai/gpt-5"
+vlm = "openai/qwen3-vl-plus"
+# LLM used for transcription
+audio_lm = "openai/whisper-1"
+
+# Reasoning model, used for `research` agent
+reasoning_llm = "openai/o4-mini"
+# Planning model, used for `research` agent
+planning_llm = "anthropic/claude-3-7-sonnet-20250219"
+
+
+
+  [app.max_upload_size_by_type]
+    # Common text-based formats
+    txt   = 2147483648  # 2 GB
+    md    = 2147483648
+    tsv   = 2147483648
+    csv   = 2147483648
+    xml   = 2147483648
+    html  = 2147483648
+
+    # Office docs
+    doc   = 2147483648
+    docx  = 2147483648
+    ppt   = 2147483648
+    pptx  = 2147483648
+    xls   = 2147483648
+    xlsx  = 2147483648
+    odt   = 2147483648
+
+    # PDFs
+    pdf   = 2147483648
+
+    # E-mail
+    eml   = 2147483648
+    msg   = 2147483648
+    p7s   = 2147483648
+
+    # Images
+    bmp   = 2147483648
+    heic  = 2147483648
+    jpeg  = 2147483648
+    jpg   = 2147483648
+    png   = 2147483648
+    tiff  = 2147483648
+
+    # E-books and other formats
+    epub  = 2147483648
+    rtf   = 2147483648
+    rst   = 2147483648
+    org   = 2147483648
+
+[agent]
+rag_agent_static_prompt = "static_rag_agent"
+rag_agent_dynamic_prompt = "dynamic_rag_agent"
+#tools = ["search_file_knowledge", "content"]
+rag_tools = ["search_file_descriptions", "search_file_knowledge", "get_file_content"] # can add  "web_search" | "web_scrape"
+# The following tools are available to the `research` agent
+research_tools = ["rag", "reasoning", "critique", "python_executor"]
+
+
+# tool_names = ["local_search", "web_search"] # uncomment to enable web search
+#tool_names = ["local_search"]
+
+#  [agent.generation_config]
+#  model = "openai/gpt-4o"
+
+[auth]
+provider = "r2r"
+access_token_lifetime_in_minutes = 6000 # set a very high default value, for easier testing
+refresh_token_lifetime_in_days = 7
+require_authentication = false
+require_email_verification = false
+default_admin_email = "xujiawei@cocorobo.cc"
+default_admin_password = "usestudio-1"
+
+[completion]
+provider = "r2r"
+#fast_llm = "openai/gpt-4o-mini"
+concurrent_request_limit = 256000
+#fast_llm = "openai/gpt-4.1-mini"
+request_timeout = 60
+
+  [completion.generation_config]
+  model = "openai/gpt-5-mini"
+  temperature = 0.1
+  top_p = 1.0
+  max_tokens_to_sample = 10_240
+  stream = false
+  add_generation_kwargs = { }
+
+[crypto]
+provider = "bcrypt"
+#provider = "hatchet"
+#kg_creation_concurrency_limit = 32
+#ingestion_concurrency_limit = 32
+#kg_concurrency_limit = 8
+
+[file]
+#provider = "postgres"
+provider = "s3"
+bucket_name = "r2r"
+endpoint_url = "https://r2r.s3.cn-north-1.amazonaws.com.cn"
+region_name = "cn-north-1"
+#aws_access_key_id = "AKIATLPEDU37YCOTNRHY"
+#aws_secret_access_key = "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"
+aws_access_key_id = "AKIATLPEDU37ZUJYM7GG"
+aws_secret_access_key = "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"
+
+#provider = "s3"
+#bucket_name = "r2r"
+#endpoint_url = "https://r2r.s3.cn-north-1.amazonaws.com.cn"
+#region_name = " cn-north-1"
+#aws_access_key_id = "AKIATLPEDU37YCOTNRHY"
+#aws_secret_access_key = "kUm8d4tKlvsApIXgJ8obn/RdK0EavYXJ977PIpRz"
+
+[database]
+provider = "postgres"
+default_collection_name = "Default"
+default_collection_description = "Your default collection."
+# collection_summary_system_prompt = 'default_system'
+# collection_summary_task_prompt = 'default_collection_summary'
+
+# KG settings
+batch_size = 64
+
+
+  # PostgreSQL tuning settings
+  [database.postgres_configuration_settings]
+    checkpoint_completion_target = 0.7
+    default_statistics_target = 100
+    effective_io_concurrency = 4
+    effective_cache_size = 5242880
+    huge_pages = "try"
+    maintenance_work_mem = 6553600
+    max_connections = 500
+    max_parallel_workers_per_gather = 16
+    max_parallel_workers = 16
+    max_parallel_maintenance_workers = 16
+    max_wal_size = 102400
+    max_worker_processes = 8
+    min_wal_size = 80
+    shared_buffers = 1638400
+    statement_cache_size = 1000
+    random_page_cost = 4.0
+    wal_buffers = 2560
+    work_mem = 65536
+
+  # Graph creation settings
+  [database.graph_creation_settings]
+    graph_entity_description_prompt = "graph_entity_description"
+    graph_extraction_prompt = "graph_extraction"
+    entity_types = []
+    relation_types = []
+    automatic_deduplication = false
+
+  # Graph enrichment settings
+  [database.graph_enrichment_settings]
+    graph_communities_prompt = "graph_communities"
+
+  # (Optional) Graph search settings – add fields as needed
+  [database.graph_search_settings]
+    # e.g., search_mode = "default"
+
+  # Rate limiting settings
+  [database.limits]
+    global_per_min = 60
+    route_per_min = 20
+    monthly_limit = 10000
+
+  # Route-specific limits (empty by default)
+  [database.route_limits]
+    # e.g., "/api/search" = { global_per_min = 30, route_per_min = 10, monthly_limit = 5000 }
+
+  # User-specific limits (empty by default)
+  [database.user_limits]
+    # e.g., "user_uuid_here" = { global_per_min = 20, route_per_min = 5, monthly_limit = 2000 }
+
+  [database.maintenance]
+    vacuum_schedule = "0 3 * * *"  # Run at 3:00 AM daily
+
+
+[embedding]
+provider = "litellm"
+
+# For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512`
+
+# RECOMMENDED - For advanced applications,
+# use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization
+#base_model = "openai/text-embedding-3-small"
+#base_dimension = 512
+
+base_model = "openai/text-embedding-3-large"
+
+#base_model = "doubao-embedding-large"
+#base_model = "/text-embedding-v3"
+
+base_dimension = 256
+
+rerank_model = ""
+rerank_url = ""
+
+# rerank_model = "huggingface/mixedbread-ai/mxbai-rerank-large-v1" # reranking model
+
+batch_size = 256
+prefixes = {}   # Provide prefix overrides here if needed
+add_title_as_prefix = false
+concurrent_request_limit = 2560
+max_retries = 3
+initial_backoff = 1.0
+max_backoff = 64.0
+# Deprecated fields (if still used)
+rerank_dimension = 0
+rerank_transformer_type = ""
+
+
+  # Vector quantization settings for embeddings
+  [embedding.quantization_settings]
+    quantization_type = "FP32"
+    # (Additional quantization parameters can be added here)
+
+[completion_embedding]
+# Generally this should be the same as the embedding config, but advanced users may want to run with a different provider to reduce latency
+provider = "litellm"
+base_model = "openai/text-embedding-3-large"
+#base_model = "doubao-embedding-large"
+base_dimension = 256
+batch_size = 128
+concurrent_request_limit = 256
+
+[ingestion]
+#provider = "r2r"
+#provider = "unstructured_local"
+provider = "unstructured_api"
+chunking_strategy = "recursive"
+chunk_size = 800
+chunk_overlap = 400
+excluded_parsers = ["mp4"]
+max_concurrent_vlm_tasks=2000
+vlm_ocr_one_page_per_chunk = true
+
+vlm_batch_size=2000
+
+
+# Ingestion-time document summary parameters
+# skip_document_summary = False
+# document_summary_system_prompt = 'default_system'
+# document_summary_task_prompt = 'default_summary'
+# chunks_for_document_summary = 128
+document_summary_model = "openai/gpt-5-mini"
+#vision_img_model = "openai/gpt-5-mini"
+vision_img_model = "openai/qwen3-vl-plus"
+vision_pdf_model = "openai/gpt-5-mini"
+automatic_extraction = false # enable automatic extraction of entities and relations
+#parser_overrides = {"pdf" = "unstructured"}
+parser_overrides = {}
+  #[ingestion.parser_overrides]
+  #  pdf = ocr
+
+  # Chunk enrichment settings
+  [ingestion.chunk_enrichment_settings]
+    chunk_enrichment_prompt = "chunk_enrichment"
+    enable_chunk_enrichment = false
+    n_chunks = 2
+
+#  [ingestion.chunk_enrichment_settings]
+#    enable_chunk_enrichment = false # disabled by default
+#    n_chunks = 2 # the number of chunks (both preceeding and succeeding) to use in enrichment
+#    generation_config = { model = "openai/gpt-4.1-mini" }
+
+  [ingestion.extra_parsers]
+    pdf = ["ocr"]
+    #pdf = "zerox"
+
+[ocr]
+provider = "mistral"
+model = "mistral-ocr-latest"
+
+[orchestration]
+provider = "hatchet"
+#provider = "no"
+max_runs = 2048
+kg_creation_concurrency_limit = 32
+ingestion_concurrency_limit = 32
+kg_concurrency_limit = 8
+
+#provider = "no"
+#max_runs = 2048
+#kg_creation_concurrency_limit = 32
+#ingestion_concurrency_limit = 16
+#kg_concurrency_limit = 4
+
+[prompt]
+provider = "r2r"
+
+[email]
+provider = "console_mock"
+
+[scheduler]
+provider = "apscheduler"

+ 291 - 0
docker/user_configs/r2r.toml.bak

@@ -0,0 +1,291 @@
+[app]
+# app settings are global available like `r2r_config.agent.app`
+# project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var
+default_max_documents_per_user = 1_000_000
+default_max_chunks_per_user = 10_000_000
+default_max_collections_per_user = 1_000_000
+
+# Set the default max upload size to 2 GB for local testing
+default_max_upload_size = 2147483648  # 2 GB for anything not explicitly listed
+
+# LLM used for internal operations, like deriving conversation names
+fast_llm = "openai/gpt-4.1-mini"
+
+# LLM used for user-facing output, like RAG replies
+quality_llm = "openai/gpt-4.1-mini"
+
+# LLM used for ingesting visual: inputs
+vlm = "openai/gpt-4.1-mini"
+
+# LLM used for transcription
+audio_lm = "openai/whisper-1"
+
+# Reasoning model, used for `research` agent
+reasoning_llm = "openai/o4-mini"
+# Planning model, used for `research` agent
+planning_llm = "anthropic/claude-3-7-sonnet-20250219"
+
+
+
+  [app.max_upload_size_by_type]
+    # Common text-based formats
+    txt   = 2147483648  # 2 GB
+    md    = 2147483648
+    tsv   = 2147483648
+    csv   = 2147483648
+    xml   = 2147483648
+    html  = 2147483648
+
+    # Office docs
+    doc   = 2147483648
+    docx  = 2147483648
+    ppt   = 2147483648
+    pptx  = 2147483648
+    xls   = 2147483648
+    xlsx  = 2147483648
+    odt   = 2147483648
+
+    # PDFs
+    pdf   = 2147483648
+
+    # E-mail
+    eml   = 2147483648
+    msg   = 2147483648
+    p7s   = 2147483648
+
+    # Images
+    bmp   = 2147483648
+    heic  = 2147483648
+    jpeg  = 2147483648
+    jpg   = 2147483648
+    png   = 2147483648
+    tiff  = 2147483648
+
+    # E-books and other formats
+    epub  = 2147483648
+    rtf   = 2147483648
+    rst   = 2147483648
+    org   = 2147483648
+
+[agent]
+rag_agent_static_prompt = "static_rag_agent"
+rag_agent_dynamic_prompt = "dynamic_rag_agent"
+#tools = ["search_file_knowledge", "content"]
+rag_tools = ["search_file_descriptions", "search_file_knowledge", "get_file_content"] # can add  "web_search" | "web_scrape"
+# The following tools are available to the `research` agent
+research_tools = ["rag", "reasoning", "critique", "python_executor"]
+
+
+# tool_names = ["local_search", "web_search"] # uncomment to enable web search
+#tool_names = ["local_search"]
+
+#  [agent.generation_config]
+#  model = "openai/gpt-4o"
+
+[auth]
+provider = "r2r"
+access_token_lifetime_in_minutes = 6000 # set a very high default value, for easier testing
+refresh_token_lifetime_in_days = 7
+require_authentication = false
+require_email_verification = false
+default_admin_email = "xujiawei@cocorobo.cc"
+default_admin_password = "usestudio-1"
+
+[completion]
+provider = "r2r"
+#fast_llm = "openai/gpt-4o-mini"
+concurrent_request_limit = 256000
+#fast_llm = "openai/gpt-4.1-mini"
+request_timeout = 60
+
+  [completion.generation_config]
+  model = "openai/gpt-4.1-mini"
+  temperature = 0.1
+  top_p = 1.0
+  max_tokens_to_sample = 10_240
+  stream = false
+  add_generation_kwargs = { }
+
+[crypto]
+provider = "bcrypt"
+
+[database]
+provider = "postgres"
+default_collection_name = "Default"
+default_collection_description = "Your default collection."
+# collection_summary_system_prompt = 'default_system'
+# collection_summary_task_prompt = 'default_collection_summary'
+
+# KG settings
+batch_size = 64
+
+
+  # PostgreSQL tuning settings
+  [database.postgres_configuration_settings]
+    checkpoint_completion_target = 0.7
+    default_statistics_target = 100
+    effective_io_concurrency = 4
+    effective_cache_size = 5242880
+    huge_pages = "try"
+    maintenance_work_mem = 655360
+    max_connections = 2560
+    max_parallel_workers_per_gather = 16
+    max_parallel_workers = 4
+    max_parallel_maintenance_workers = 4
+    max_wal_size = 102400
+    max_worker_processes = 8
+    min_wal_size = 80
+    shared_buffers = 163840
+    statement_cache_size = 1000
+    random_page_cost = 1.1
+    wal_buffers = 2560
+    work_mem = 409600
+
+  # Graph creation settings
+  [database.graph_creation_settings]
+    graph_entity_description_prompt = "graph_entity_description"
+    graph_extraction_prompt = "graph_extraction"
+    entity_types = []
+    relation_types = []
+    automatic_deduplication = false
+
+  # Graph enrichment settings
+  [database.graph_enrichment_settings]
+    graph_communities_prompt = "graph_communities"
+
+  # (Optional) Graph search settings – add fields as needed
+  [database.graph_search_settings]
+    # e.g., search_mode = "default"
+
+  # Rate limiting settings
+  [database.limits]
+    global_per_min = 60
+    route_per_min = 20
+    monthly_limit = 10000
+
+  # Route-specific limits (empty by default)
+  [database.route_limits]
+    # e.g., "/api/search" = { global_per_min = 30, route_per_min = 10, monthly_limit = 5000 }
+
+  # User-specific limits (empty by default)
+  [database.user_limits]
+    # e.g., "user_uuid_here" = { global_per_min = 20, route_per_min = 5, monthly_limit = 2000 }
+
+  [database.maintenance]
+    vacuum_schedule = "0 3 * * *"  # Run at 3:00 AM daily
+
+
+[embedding]
+provider = "litellm"
+
+# For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512`
+
+# RECOMMENDED - For advanced applications,
+# use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization
+#base_model = "openai/text-embedding-3-small"
+#base_dimension = 512
+
+base_model = "openai/text-embedding-3-large"
+
+#base_model = "/text-embedding-v3"
+
+base_dimension = 256
+
+rerank_model = ""
+rerank_url = ""
+
+# rerank_model = "huggingface/mixedbread-ai/mxbai-rerank-large-v1" # reranking model
+
+batch_size = 256
+prefixes = {}   # Provide prefix overrides here if needed
+add_title_as_prefix = false
+concurrent_request_limit = 2560
+max_retries = 3
+initial_backoff = 1.0
+max_backoff = 64.0
+# Deprecated fields (if still used)
+rerank_dimension = 0
+rerank_transformer_type = ""
+
+
+  # Vector quantization settings for embeddings
+  [embedding.quantization_settings]
+    quantization_type = "FP32"
+    # (Additional quantization parameters can be added here)
+
+[completion_embedding]
+# Generally this should be the same as the embedding config, but advanced users may want to run with a different provider to reduce latency
+provider = "litellm"
+base_model = "openai/text-embedding-3-large"
+#base_model = "dashscope/text-embedding-v3"
+base_dimension = 256
+batch_size = 128
+add_title_as_prefix = false
+concurrent_request_limit = 256
+
+[file]
+provider = "postgres"
+
+[ingestion]
+provider = "r2r"
+chunking_strategy = "recursive"
+chunk_size = 800
+chunk_overlap = 400
+excluded_parsers = ["mp4"]
+max_concurrent_vlm_tasks=200
+vlm_ocr_one_page_per_chunk = true
+
+vlm_batch_size=200
+
+
+# Ingestion-time document summary parameters
+# skip_document_summary = False
+# document_summary_system_prompt = 'default_system'
+# document_summary_task_prompt = 'default_summary'
+# chunks_for_document_summary = 128
+document_summary_model = "openai/gpt-4.1-mini"
+vision_img_model = "openai/gpt-4.1-mini"
+vision_pdf_model = "openai/gpt-4.1-mini"
+automatic_extraction = false # enable automatic extraction of entities and relations
+parser_overrides = {}
+
+
+  # Chunk enrichment settings
+  [ingestion.chunk_enrichment_settings]
+    chunk_enrichment_prompt = "chunk_enrichment"
+    enable_chunk_enrichment = false
+    n_chunks = 2
+
+#  [ingestion.chunk_enrichment_settings]
+#    enable_chunk_enrichment = false # disabled by default
+#    n_chunks = 2 # the number of chunks (both preceeding and succeeding) to use in enrichment
+#    generation_config = { model = "openai/gpt-4.1-mini" }
+
+  [ingestion.extra_parsers]
+    pdf = ["ocr", "zerox"]
+    #pdf = "ocr"
+
+[logging]
+provider = "r2r"
+log_table = "logs"
+log_info_table = "log_info"
+
+[ocr]
+provider = "mistral"
+model = "mistral-ocr-latest"
+
+[orchestration]
+provider = "no"
+#max_runs = 2048
+#kg_creation_concurrency_limit = 32
+#ingestion_concurrency_limit = 16
+#kg_concurrency_limit = 4
+
+[prompt]
+provider = "r2r"
+
+[email]
+provider = "console_mock"
+
+[scheduler]
+provider = "apscheduler"

+ 61 - 0
docker/user_tools/README.md

@@ -0,0 +1,61 @@
+# User-Defined Tools Directory
+
+## Overview
+This directory is mounted inside the R2R Docker container and is intended for custom tool files. Any files placed here will be accessible to the application running in the container.
+
+## Usage
+1. Place your custom tool definitions in this directory. Utilize the template structure demonstrated here.
+2. Add any additional dependencies that you may need to the user_requirements.txt file in this directory.
+3. Include the tool in your agent configuration.
+
+## Creating a tool
+```python
+from core.base.agent.tools.base import Tool
+
+
+class ToolNameTool(Tool):
+    """
+    A user defined tool.
+    """
+
+    def __init__(self):
+        super().__init__(
+            name="tool_name",
+            description="A natural language tool description that is shown to the agent.",
+            parameters={
+                "type": "object",
+                "properties": {
+                    "input_parameter": {
+                        "type": "string",
+                        "description": "Define any input parameters by their name and type",
+                    },
+                },
+                "required": ["input_parameter"],
+            },
+            results_function=self.execute,
+            llm_format_function=None,
+        )
+
+    async def execute(self, input_parameter: str, *args, **kwargs):
+        """
+        Implementation of the tool.
+        """
+
+        # Any custom tool logic can go here
+
+        output_response = some_method(input_parameter)
+
+        result = AggregateSearchResult(
+            generic_tool_result=[web_response],
+        )
+
+        # Add to results collector if context is provided
+        if context and hasattr(context, "search_results_collector"):
+            context.search_results_collector.add_aggregate_result(result)
+
+        return result
+```
+
+## Troubleshooting
+
+For more detailed configuration information, see the main documentation.

+ 0 - 0
docker/user_tools/user_requirements.txt