| LLMNode.deduct_llm_quota( | LLMNode.deduct_llm_quota( | ||||
| tenant_id=tenant.id, model_instance=model_instance, usage=chunk.delta.usage | tenant_id=tenant.id, model_instance=model_instance, usage=chunk.delta.usage | ||||
| ) | ) | ||||
| chunk.prompt_messages = [] | |||||
| yield chunk | yield chunk | ||||
| return handle() | return handle() | ||||
| def handle_non_streaming(response: LLMResult) -> Generator[LLMResultChunk, None, None]: | def handle_non_streaming(response: LLMResult) -> Generator[LLMResultChunk, None, None]: | ||||
| yield LLMResultChunk( | yield LLMResultChunk( | ||||
| model=response.model, | model=response.model, | ||||
| prompt_messages=response.prompt_messages, | |||||
| prompt_messages=[], | |||||
| system_fingerprint=response.system_fingerprint, | system_fingerprint=response.system_fingerprint, | ||||
| delta=LLMResultChunkDelta( | delta=LLMResultChunkDelta( | ||||
| index=0, | index=0, |