full_lm_studio.toml 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. [app]
  2. # LLM used for internal operations, like deriving conversation names
  3. fast_llm = "lm_studio/llama-3.2-3b-instruct"
  4. # LLM used for user-facing output, like RAG replies
  5. quality_llm = "lm_studio/llama-3.2-3b-instruct"
  6. # LLM used for ingesting visual inputs
  7. vlm = "lm_studio/llama3.2-vision" # TODO - Replace with viable candidate
  8. # LLM used for transcription
  9. audio_lm = "lm_studio/llama-3.2-3b-instruct" # TODO - Replace with viable candidate
  10. [embedding]
  11. provider = "litellm"
  12. base_model = "lm_studio/text-embedding-nomic-embed-text-v1.5"
  13. base_dimension = nan
  14. batch_size = 128
  15. concurrent_request_limit = 2
  16. [completion_embedding]
  17. # Generally this should be the same as the embedding config, but advanced users may want to run with a different provider to reduce latency
  18. provider = "litellm"
  19. base_model = "lm_studio/text-embedding-nomic-embed-text-v1.5"
  20. base_dimension = nan
  21. batch_size = 128
  22. concurrent_request_limit = 2
  23. [agent]
  24. tools = ["search_file_knowledge"]
  25. [completion]
  26. provider = "litellm"
  27. concurrent_request_limit = 1
  28. [completion.generation_config]
  29. temperature = 0.1
  30. top_p = 1
  31. max_tokens_to_sample = 1_024
  32. stream = false
  33. [ingestion]
  34. provider = "unstructured_local"
  35. strategy = "auto"
  36. chunking_strategy = "by_title"
  37. new_after_n_chars = 512
  38. max_characters = 1_024
  39. combine_under_n_chars = 128
  40. overlap = 20
  41. chunks_for_document_summary = 16
  42. document_summary_model = "lm_studio/llama-3.2-3b-instruct"
  43. automatic_extraction = false
  44. [orchestration]
  45. provider = "hatchet"