12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- [app]
- # LLM used for internal operations, like deriving conversation names
- fast_llm = "ollama/llama3.1"
- # LLM used for user-facing output, like RAG replies
- quality_llm = "ollama/llama3.1"
- # LLM used for ingesting visual inputs
- vlm = "ollama/llama3.1" # TODO - Replace with viable candidate
- # LLM used for transcription
- audio_lm = "ollama/llama3.1" # TODO - Replace with viable candidate
- # Reasoning model, used for `research` agent
- reasoning_llm = "ollama/llama3.1"
- # Planning model, used for `research` agent
- planning_llm = "ollama/llama3.1"
- [embedding]
- provider = "ollama"
- base_model = "mxbai-embed-large"
- base_dimension = 1_024
- batch_size = 128
- concurrent_request_limit = 2
- [completion_embedding]
- provider = "ollama"
- base_model = "mxbai-embed-large"
- base_dimension = 1_024
- batch_size = 128
- concurrent_request_limit = 2
- [agent]
- tools = ["search_file_knowledge"]
- [completion]
- provider = "litellm"
- concurrent_request_limit = 1
- [completion.generation_config]
- temperature = 0.1
- top_p = 1
- max_tokens_to_sample = 1_024
- stream = false
- api_base = "http://host.docker.internal:11434"
- [ingestion]
- provider = "unstructured_local"
- strategy = "auto"
- chunking_strategy = "by_title"
- new_after_n_chars = 512
- max_characters = 1_024
- combine_under_n_chars = 128
- overlap = 20
- chunks_for_document_summary = 16
- document_summary_model = "ollama/llama3.1"
- automatic_extraction = false
- [orchestration]
- provider = "hatchet"
|