| @@ -364,14 +364,21 @@ class OllamaLargeLanguageModel(LargeLanguageModel): | |||
| if chunk_json["done"]: | |||
| # calculate num tokens | |||
| if "prompt_eval_count" in chunk_json and "eval_count" in chunk_json: | |||
| # transform usage | |||
| if "prompt_eval_count" in chunk_json: | |||
| prompt_tokens = chunk_json["prompt_eval_count"] | |||
| completion_tokens = chunk_json["eval_count"] | |||
| else: | |||
| # calculate num tokens | |||
| prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content) | |||
| completion_tokens = self._get_num_tokens_by_gpt2(full_text) | |||
| prompt_message_content = prompt_messages[0].content | |||
| if isinstance(prompt_message_content, str): | |||
| prompt_tokens = self._get_num_tokens_by_gpt2(prompt_message_content) | |||
| else: | |||
| content_text = "" | |||
| for message_content in prompt_message_content: | |||
| if message_content.type == PromptMessageContentType.TEXT: | |||
| message_content = cast(TextPromptMessageContent, message_content) | |||
| content_text += message_content.data | |||
| prompt_tokens = self._get_num_tokens_by_gpt2(content_text) | |||
| completion_tokens = chunk_json.get("eval_count", self._get_num_tokens_by_gpt2(full_text)) | |||
| # transform usage | |||
| usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) | |||