| @@ -1,4 +1,3 @@ | |||
| import re | |||
| from collections.abc import Generator | |||
| from typing import Optional, cast | |||
| @@ -104,17 +103,16 @@ class ArkClientV3: | |||
| if message_content.type == PromptMessageContentType.TEXT: | |||
| content.append( | |||
| ChatCompletionContentPartTextParam( | |||
| text=message_content.text, | |||
| text=message_content.data, | |||
| type="text", | |||
| ) | |||
| ) | |||
| elif message_content.type == PromptMessageContentType.IMAGE: | |||
| message_content = cast(ImagePromptMessageContent, message_content) | |||
| image_data = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", message_content.data) | |||
| content.append( | |||
| ChatCompletionContentPartImageParam( | |||
| image_url=ImageURL( | |||
| url=image_data, | |||
| url=message_content.data, | |||
| detail=message_content.detail.value, | |||
| ), | |||
| type="image_url", | |||
| @@ -132,6 +132,14 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel): | |||
| messages_dict = [ArkClientV3.convert_prompt_message(m) for m in messages] | |||
| for message in messages_dict: | |||
| for key, value in message.items(): | |||
| # Ignore tokens for image type | |||
| if isinstance(value, list): | |||
| text = "" | |||
| for item in value: | |||
| if isinstance(item, dict) and item["type"] == "text": | |||
| text += item["text"] | |||
| value = text | |||
| num_tokens += self._get_num_tokens_by_gpt2(str(key)) | |||
| num_tokens += self._get_num_tokens_by_gpt2(str(value)) | |||
| @@ -16,6 +16,14 @@ class ModelConfig(BaseModel): | |||
| configs: dict[str, ModelConfig] = { | |||
| "Doubao-vision-pro-32k": ModelConfig( | |||
| properties=ModelProperties(context_size=32768, max_tokens=4096, mode=LLMMode.CHAT), | |||
| features=[ModelFeature.VISION], | |||
| ), | |||
| "Doubao-vision-lite-32k": ModelConfig( | |||
| properties=ModelProperties(context_size=32768, max_tokens=4096, mode=LLMMode.CHAT), | |||
| features=[ModelFeature.VISION], | |||
| ), | |||
| "Doubao-pro-4k": ModelConfig( | |||
| properties=ModelProperties(context_size=4096, max_tokens=4096, mode=LLMMode.CHAT), | |||
| features=[ModelFeature.TOOL_CALL], | |||
| @@ -118,6 +118,18 @@ model_credential_schema: | |||
| type: select | |||
| required: true | |||
| options: | |||
| - label: | |||
| en_US: Doubao-vision-pro-32k | |||
| value: Doubao-vision-pro-32k | |||
| show_on: | |||
| - variable: __model_type | |||
| value: llm | |||
| - label: | |||
| en_US: Doubao-vision-lite-32k | |||
| value: Doubao-vision-lite-32k | |||
| show_on: | |||
| - variable: __model_type | |||
| value: llm | |||
| - label: | |||
| en_US: Doubao-pro-4k | |||
| value: Doubao-pro-4k | |||