ollama.toml 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. [app]
  2. # LLM used for internal operations, like deriving conversation names
  3. fast_llm = "ollama/llama3.1" ### NOTE - RECOMMENDED TO USE `openai` with `api_base = "http://localhost:11434/v1"` for best results, otherwise `ollama` with `litellm` is acceptable
  4. # LLM used for user-facing output, like RAG replies
  5. quality_llm = "ollama/llama3.1"
  6. # LLM used for ingesting visual inputs
  7. vlm = "ollama/llama3.1" # TODO - Replace with viable candidate
  8. # LLM used for transcription
  9. audio_lm = "ollama/llama3.1" # TODO - Replace with viable candidate
  10. # Reasoning model, used for `research` agent
  11. reasoning_llm = "ollama/llama3.1"
  12. # Planning model, used for `research` agent
  13. planning_llm = "ollama/llama3.1"
  14. [embedding]
  15. provider = "ollama"
  16. base_model = "mxbai-embed-large"
  17. base_dimension = 1_024
  18. batch_size = 128
  19. concurrent_request_limit = 2
  20. [completion_embedding]
  21. provider = "ollama"
  22. base_model = "mxbai-embed-large"
  23. base_dimension = 1_024
  24. batch_size = 128
  25. concurrent_request_limit = 2
  26. [agent]
  27. tools = ["search_file_knowledge"]
  28. [completion]
  29. provider = "litellm"
  30. concurrent_request_limit = 1
  31. [completion.generation_config]
  32. temperature = 0.1
  33. top_p = 1
  34. max_tokens_to_sample = 1_024
  35. stream = false
  36. api_base = "http://localhost:11434/v1"