浏览代码

fix: first agent latency (#2334)

tags/0.5.3
Yeuoly 1年前
父节点
当前提交
3b357f51a6
没有帐户链接到提交者的电子邮件
共有 1 个文件被更改,包括 6 次插入1 次删除
  1. 6
    1
      api/core/features/assistant_fc_runner.py

+ 6
- 1
api/core/features/assistant_fc_runner.py 查看文件

@@ -97,7 +97,6 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner):
tool_input='',
messages_ids=message_file_ids
)
self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER)

# recale llm max tokens
self.recale_llm_max_tokens(self.model_config, prompt_messages)
@@ -124,7 +123,11 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner):
current_llm_usage = None

if self.stream_tool_call:
is_first_chunk = True
for chunk in chunks:
if is_first_chunk:
self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER)
is_first_chunk = False
# check if there is any tool call
if self.check_tool_calls(chunk):
function_call_state = True
@@ -183,6 +186,8 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner):
if not result.message.content:
result.message.content = ''

self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER)
yield LLMResultChunk(
model=model_instance.model,
prompt_messages=result.prompt_messages,

正在加载...
取消
保存