2 months ago · dab89fbb6d
--- a/app/core/runner/llm_backend.py
+++ b/app/core/runner/llm_backend.py
@@ -29,7 +29,7 @@ class LLMBackend:
 
				         temperature=None,
			
 
				         top_p=None,
			
 
				         response_format=None,
			
 
				-        parallel_tool_calls=False,
			
 
				+        parallel_tool_calls=True,
			
 
				         audio=None,
			
 
				         modalities=None,
			
 
				     ) -> ChatCompletion | Stream[ChatCompletionChunk]:
			
@@ -40,7 +40,8 @@ class LLMBackend:
 
				             "messages": messages,
			
 
				             "model": model,
			
 
				             "stream": stream,
			
 
				-            "presence_penalty": None,
			
 
				+            "presence_penalty": 0,
			
 
				+            "frequency_penalty": 0
			
 
				             # "parallel_tool_calls": parallel_tool_calls,
			
 
				         }
			
 
				         if extra_body:
			
@@ -84,6 +85,7 @@ class LLMBackend:
 
				             if "content" not in message:
			
 
				                 message["content"] = ""
			
 
				         chat_params["timeout"] = 300
			
 
				+
			
 
				         logging.info("chat_params: %s", chat_params)
			
 
				         response = self.client.chat.completions.create(**chat_params)
			
 
				         logging.info("chat_response: %s", response)
			
--- a/app/core/runner/thread_runner.py
+++ b/app/core/runner/thread_runner.py
@@ -195,7 +195,7 @@ class ThreadRunner:
 
				             stream=self.stream,
			
 
				             stream_options=run.stream_options,
			
 
				             extra_body=run.extra_body,
			
 
				-            temperature=run.temperature,
			
 
				+            temperature=run.temperature or 1.0,
			
 
				             top_p=run.top_p,
			
 
				             response_format=run.response_format,
			
 
				             parallel_tool_calls=run.parallel_tool_calls,