|
|
|
@@ -239,16 +239,14 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel): |
|
|
|
|
|
|
|
def _handle_stream_chat_response(chunks: Generator[ChatCompletionChunk]) -> Generator: |
|
|
|
for chunk in chunks: |
|
|
|
if not chunk.choices: |
|
|
|
continue |
|
|
|
choice = chunk.choices[0] |
|
|
|
|
|
|
|
yield LLMResultChunk( |
|
|
|
model=model, |
|
|
|
prompt_messages=prompt_messages, |
|
|
|
delta=LLMResultChunkDelta( |
|
|
|
index=choice.index, |
|
|
|
message=AssistantPromptMessage(content=choice.delta.content, tool_calls=[]), |
|
|
|
index=0, |
|
|
|
message=AssistantPromptMessage( |
|
|
|
content=chunk.choices[0].delta.content if chunk.choices else "", tool_calls=[] |
|
|
|
), |
|
|
|
usage=self._calc_response_usage( |
|
|
|
model=model, |
|
|
|
credentials=credentials, |
|
|
|
@@ -257,7 +255,7 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel): |
|
|
|
) |
|
|
|
if chunk.usage |
|
|
|
else None, |
|
|
|
finish_reason=choice.finish_reason, |
|
|
|
finish_reason=chunk.choices[0].finish_reason if chunk.choices else None, |
|
|
|
), |
|
|
|
) |
|
|
|
|