vor 5 Monaten · 9bbd646f40
--- a/api/core/plugin/backwards_invocation/model.py
+++ b/api/core/plugin/backwards_invocation/model.py
                        LLMNode.deduct_llm_quota(
                            tenant_id=tenant.id, model_instance=model_instance, usage=chunk.delta.usage
                        )
                    chunk.prompt_messages = []
                    yield chunk
            return handle()
            def handle_non_streaming(response: LLMResult) -> Generator[LLMResultChunk, None, None]:
                yield LLMResultChunk(
                    model=response.model,
                    prompt_messages=response.prompt_messages,
                    prompt_messages=[],
                    system_fingerprint=response.system_fingerprint,
                    delta=LLMResultChunkDelta(
                        index=0,