| @@ -97,7 +97,6 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner): | |||
| tool_input='', | |||
| messages_ids=message_file_ids | |||
| ) | |||
| self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER) | |||
| # recale llm max tokens | |||
| self.recale_llm_max_tokens(self.model_config, prompt_messages) | |||
| @@ -124,7 +123,11 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner): | |||
| current_llm_usage = None | |||
| if self.stream_tool_call: | |||
| is_first_chunk = True | |||
| for chunk in chunks: | |||
| if is_first_chunk: | |||
| self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER) | |||
| is_first_chunk = False | |||
| # check if there is any tool call | |||
| if self.check_tool_calls(chunk): | |||
| function_call_state = True | |||
| @@ -183,6 +186,8 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner): | |||
| if not result.message.content: | |||
| result.message.content = '' | |||
| self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER) | |||
| yield LLMResultChunk( | |||
| model=model_instance.model, | |||
| prompt_messages=result.prompt_messages, | |||