12345678910111213141516171819202122232425262728293031323334353637383940 |
- [app]
- # LLM used for internal operations, like deriving conversation names
- fast_llm = "lm_studio/llama-3.2-3b-instruct"
- # LLM used for user-facing output, like RAG replies
- quality_llm = "lm_studio/llama-3.2-3b-instruct"
- # LLM used for ingesting visual inputs
- vlm = "lm_studio/llama3.2-vision" # TODO - Replace with viable candidate
- # LLM used for transcription
- audio_lm = "lm_studio/llama-3.2-3b-instruct" # TODO - Replace with viable candidate
- [embedding]
- provider = "litellm"
- base_model = "lm_studio/text-embedding-nomic-embed-text-v1.5"
- base_dimension = nan
- batch_size = 128
- concurrent_request_limit = 2
- [completion_embedding]
- # Generally this should be the same as the embedding config, but advanced users may want to run with a different provider to reduce latency
- provider = "litellm"
- base_model = "lm_studio/text-embedding-nomic-embed-text-v1.5"
- base_dimension = nan
- batch_size = 128
- concurrent_request_limit = 2
- [agent]
- tools = ["search_file_knowledge"]
- [completion]
- provider = "litellm"
- concurrent_request_limit = 1
- [completion.generation_config]
- temperature = 0.1
- top_p = 1
- max_tokens_to_sample = 1_024
- stream = false
|