Co-authored-by: duyalei <>tags/0.8.1
| help: | help: | ||||
| zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 | zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 | ||||
| en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. | en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. | ||||
| - name: response_format | |||||
| use_template: response_format | |||||
| pricing: | pricing: | ||||
| input: '0.00025' | input: '0.00025' | ||||
| output: '0.00125' | output: '0.00125' |
| help: | help: | ||||
| zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 | zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 | ||||
| en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. | en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. | ||||
| - name: response_format | |||||
| use_template: response_format | |||||
| pricing: | pricing: | ||||
| input: '0.015' | input: '0.015' | ||||
| output: '0.075' | output: '0.075' |
| help: | help: | ||||
| zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 | zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 | ||||
| en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. | en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. | ||||
| - name: response_format | |||||
| use_template: response_format | |||||
| pricing: | pricing: | ||||
| input: '0.003' | input: '0.003' | ||||
| output: '0.015' | output: '0.015' |
| help: | help: | ||||
| zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 | zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 | ||||
| en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. | en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. | ||||
| - name: response_format | |||||
| use_template: response_format | |||||
| pricing: | pricing: | ||||
| input: '0.003' | input: '0.003' | ||||
| output: '0.015' | output: '0.015' |
| help: | help: | ||||
| zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 | zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 | ||||
| en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. | en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. | ||||
| - name: response_format | |||||
| use_template: response_format | |||||
| pricing: | pricing: | ||||
| input: '0.008' | input: '0.008' | ||||
| output: '0.024' | output: '0.024' |
| help: | help: | ||||
| zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 | zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 | ||||
| en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. | en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. | ||||
| - name: response_format | |||||
| use_template: response_format | |||||
| pricing: | pricing: | ||||
| input: '0.008' | input: '0.008' | ||||
| output: '0.024' | output: '0.024' |
| from PIL.Image import Image | from PIL.Image import Image | ||||
| # local import | # local import | ||||
| from core.model_runtime.callbacks.base_callback import Callback | |||||
| from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta | from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta | ||||
| from core.model_runtime.entities.message_entities import ( | from core.model_runtime.entities.message_entities import ( | ||||
| AssistantPromptMessage, | AssistantPromptMessage, | ||||
| from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel | from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel | ||||
| logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
| ANTHROPIC_BLOCK_MODE_PROMPT = """You should always follow the instructions and output a valid {{block}} object. | |||||
| The structure of the {{block}} object you can found in the instructions, use {"answer": "$your_answer"} as the default structure | |||||
| if you are not sure about the structure. | |||||
| <instructions> | |||||
| {{instructions}} | |||||
| </instructions> | |||||
| """ | |||||
| class BedrockLargeLanguageModel(LargeLanguageModel): | class BedrockLargeLanguageModel(LargeLanguageModel): | ||||
| logger.info(f"current model id: {model_id} did not support by Converse API") | logger.info(f"current model id: {model_id} did not support by Converse API") | ||||
| return None | return None | ||||
| def _code_block_mode_wrapper( | |||||
| self, | |||||
| model: str, | |||||
| credentials: dict, | |||||
| prompt_messages: list[PromptMessage], | |||||
| model_parameters: dict, | |||||
| tools: Optional[list[PromptMessageTool]] = None, | |||||
| stop: Optional[list[str]] = None, | |||||
| stream: bool = True, | |||||
| user: Optional[str] = None, | |||||
| callbacks: list[Callback] = None, | |||||
| ) -> Union[LLMResult, Generator]: | |||||
| """ | |||||
| Code block mode wrapper for invoking large language model | |||||
| """ | |||||
| if model_parameters.get("response_format"): | |||||
| stop = stop or [] | |||||
| if "```\n" not in stop: | |||||
| stop.append("```\n") | |||||
| if "\n```" not in stop: | |||||
| stop.append("\n```") | |||||
| response_format = model_parameters.pop("response_format") | |||||
| format_prompt = SystemPromptMessage( | |||||
| content=ANTHROPIC_BLOCK_MODE_PROMPT.replace("{{instructions}}", prompt_messages[0].content).replace( | |||||
| "{{block}}", response_format | |||||
| ) | |||||
| ) | |||||
| if len(prompt_messages) > 0 and isinstance(prompt_messages[0], SystemPromptMessage): | |||||
| prompt_messages[0] = format_prompt | |||||
| else: | |||||
| prompt_messages.insert(0, format_prompt) | |||||
| prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}")) | |||||
| return self._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user) | |||||
| def _invoke( | def _invoke( | ||||
| self, | self, | ||||
| model: str, | model: str, |