lm_studio.toml 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. [app]
  2. # LLM used for internal operations, like deriving conversation names
  3. fast_llm = "lm_studio/llama-3.2-3b-instruct"
  4. # LLM used for user-facing output, like RAG replies
  5. quality_llm = "lm_studio/llama-3.2-3b-instruct"
  6. # LLM used for ingesting visual inputs
  7. vlm = "lm_studio/llama3.2-vision" # TODO - Replace with viable candidate
  8. # LLM used for transcription
  9. audio_lm = "lm_studio/llama-3.2-3b-instruct" # TODO - Replace with viable candidate
  10. [embedding]
  11. provider = "litellm"
  12. base_model = "lm_studio/text-embedding-nomic-embed-text-v1.5"
  13. base_dimension = nan
  14. batch_size = 128
  15. concurrent_request_limit = 2
  16. [completion_embedding]
  17. # Generally this should be the same as the embedding config, but advanced users may want to run with a different provider to reduce latency
  18. provider = "litellm"
  19. base_model = "lm_studio/text-embedding-nomic-embed-text-v1.5"
  20. base_dimension = nan
  21. batch_size = 128
  22. concurrent_request_limit = 2
  23. [agent]
  24. tools = ["search_file_knowledge"]
  25. [completion]
  26. provider = "litellm"
  27. concurrent_request_limit = 1
  28. [completion.generation_config]
  29. temperature = 0.1
  30. top_p = 1
  31. max_tokens_to_sample = 1_024
  32. stream = false