|
|
|
@@ -213,18 +213,21 @@ class SparkLargeLanguageModel(LargeLanguageModel): |
|
|
|
:param prompt_messages: prompt messages |
|
|
|
:return: llm response chunk generator result |
|
|
|
""" |
|
|
|
completion = "" |
|
|
|
for index, content in enumerate(client.subscribe()): |
|
|
|
if isinstance(content, dict): |
|
|
|
delta = content["data"] |
|
|
|
else: |
|
|
|
delta = content |
|
|
|
|
|
|
|
completion += delta |
|
|
|
assistant_prompt_message = AssistantPromptMessage( |
|
|
|
content=delta or "", |
|
|
|
) |
|
|
|
|
|
|
|
temp_assistant_prompt_message = AssistantPromptMessage( |
|
|
|
content=completion, |
|
|
|
) |
|
|
|
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages) |
|
|
|
completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message]) |
|
|
|
completion_tokens = self.get_num_tokens(model, credentials, [temp_assistant_prompt_message]) |
|
|
|
|
|
|
|
# transform usage |
|
|
|
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) |