12345678910111213141516171819202122232425262728293031323334353637383940414243444546 |
- [app]
- # LLM used for internal operations, like deriving conversation names
- fast_llm = "ollama/llama3.1" ### NOTE - RECOMMENDED TO USE `openai` with `api_base = "http://localhost:11434/v1"` for best results, otherwise `ollama` with `litellm` is acceptable
- # LLM used for user-facing output, like RAG replies
- quality_llm = "ollama/llama3.1"
- # LLM used for ingesting visual inputs
- vlm = "ollama/llama3.1" # TODO - Replace with viable candidate
- # LLM used for transcription
- audio_lm = "ollama/llama3.1" # TODO - Replace with viable candidate
- # Reasoning model, used for `research` agent
- reasoning_llm = "ollama/llama3.1"
- # Planning model, used for `research` agent
- planning_llm = "ollama/llama3.1"
- [embedding]
- provider = "ollama"
- base_model = "mxbai-embed-large"
- base_dimension = 1_024
- batch_size = 128
- concurrent_request_limit = 2
- [completion_embedding]
- provider = "ollama"
- base_model = "mxbai-embed-large"
- base_dimension = 1_024
- batch_size = 128
- concurrent_request_limit = 2
- [agent]
- tools = ["search_file_knowledge"]
- [completion]
- provider = "litellm"
- concurrent_request_limit = 1
- [completion.generation_config]
- temperature = 0.1
- top_p = 1
- max_tokens_to_sample = 1_024
- stream = false
- api_base = "http://localhost:11434/v1"
|