full_ollama.toml 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. [app]
  2. # LLM used for internal operations, like deriving conversation names
  3. fast_llm = "ollama/llama3.1"
  4. # LLM used for user-facing output, like RAG replies
  5. quality_llm = "ollama/llama3.1"
  6. # LLM used for ingesting visual inputs
  7. vlm = "ollama/llama3.1" # TODO - Replace with viable candidate
  8. # LLM used for transcription
  9. audio_lm = "ollama/llama3.1" # TODO - Replace with viable candidate
  10. # Reasoning model, used for `research` agent
  11. reasoning_llm = "ollama/llama3.1"
  12. # Planning model, used for `research` agent
  13. planning_llm = "ollama/llama3.1"
  14. [embedding]
  15. provider = "ollama"
  16. base_model = "mxbai-embed-large"
  17. base_dimension = 1_024
  18. batch_size = 128
  19. concurrent_request_limit = 2
  20. [completion_embedding]
  21. provider = "ollama"
  22. base_model = "mxbai-embed-large"
  23. base_dimension = 1_024
  24. batch_size = 128
  25. concurrent_request_limit = 2
  26. [agent]
  27. tools = ["search_file_knowledge"]
  28. [completion]
  29. provider = "litellm"
  30. concurrent_request_limit = 1
  31. [completion.generation_config]
  32. temperature = 0.1
  33. top_p = 1
  34. max_tokens_to_sample = 1_024
  35. stream = false
  36. api_base = "http://host.docker.internal:11434"
  37. [ingestion]
  38. provider = "unstructured_local"
  39. strategy = "auto"
  40. chunking_strategy = "by_title"
  41. new_after_n_chars = 512
  42. max_characters = 1_024
  43. combine_under_n_chars = 128
  44. overlap = 20
  45. chunks_for_document_summary = 16
  46. document_summary_model = "ollama/llama3.1"
  47. automatic_extraction = false
  48. [orchestration]
  49. provider = "hatchet"