| @@ -58,6 +58,7 @@ class PluginModelBackwardsInvocation(BaseBackwardsInvocation): | |||
| LLMNode.deduct_llm_quota( | |||
| tenant_id=tenant.id, model_instance=model_instance, usage=chunk.delta.usage | |||
| ) | |||
| chunk.prompt_messages = [] | |||
| yield chunk | |||
| return handle() | |||
| @@ -68,7 +69,7 @@ class PluginModelBackwardsInvocation(BaseBackwardsInvocation): | |||
| def handle_non_streaming(response: LLMResult) -> Generator[LLMResultChunk, None, None]: | |||
| yield LLMResultChunk( | |||
| model=response.model, | |||
| prompt_messages=response.prompt_messages, | |||
| prompt_messages=[], | |||
| system_fingerprint=response.system_fingerprint, | |||
| delta=LLMResultChunkDelta( | |||
| index=0, | |||