Co-authored-by: 方程 <fangcheng@oschina.cn>tags/0.14.1
| model: InternVL2-8B | |||||
| label: | |||||
| en_US: InternVL2-8B | |||||
| model_type: llm | |||||
| features: | |||||
| - vision | |||||
| - agent-thought | |||||
| model_properties: | |||||
| mode: chat | |||||
| context_size: 32000 | |||||
| parameter_rules: | |||||
| - name: max_tokens | |||||
| use_template: max_tokens | |||||
| label: | |||||
| en_US: "Max Tokens" | |||||
| zh_Hans: "最大Token数" | |||||
| type: int | |||||
| default: 512 | |||||
| min: 1 | |||||
| required: true | |||||
| help: | |||||
| en_US: "The maximum number of tokens that can be generated by the model varies depending on the model." | |||||
| zh_Hans: "模型可生成的最大 token 个数,不同模型上限不同。" | |||||
| - name: temperature | |||||
| use_template: temperature | |||||
| label: | |||||
| en_US: "Temperature" | |||||
| zh_Hans: "采样温度" | |||||
| type: float | |||||
| default: 0.7 | |||||
| min: 0.0 | |||||
| max: 1.0 | |||||
| precision: 1 | |||||
| required: true | |||||
| help: | |||||
| en_US: "The randomness of the sampling temperature control output. The temperature value is within the range of [0.0, 1.0]. The higher the value, the more random and creative the output; the lower the value, the more stable it is. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time." | |||||
| zh_Hans: "采样温度控制输出的随机性。温度值在 [0.0, 1.0] 范围内,值越高,输出越随机和创造性;值越低,输出越稳定。建议根据需求调整 top_p 或 temperature 参数,避免同时调整两者。" | |||||
| - name: top_p | |||||
| use_template: top_p | |||||
| label: | |||||
| en_US: "Top P" | |||||
| zh_Hans: "Top P" | |||||
| type: float | |||||
| default: 0.7 | |||||
| min: 0.0 | |||||
| max: 1.0 | |||||
| precision: 1 | |||||
| required: true | |||||
| help: | |||||
| en_US: "The value range of the sampling method is [0.0, 1.0]. The top_p value determines that the model selects tokens from the top p% of candidate words with the highest probability; when top_p is 0, this parameter is invalid. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time." | |||||
| zh_Hans: "采样方法的取值范围为 [0.0,1.0]。top_p 值确定模型从概率最高的前p%的候选词中选取 tokens;当 top_p 为 0 时,此参数无效。建议根据需求调整 top_p 或 temperature 参数,避免同时调整两者。" | |||||
| - name: top_k | |||||
| use_template: top_k | |||||
| label: | |||||
| en_US: "Top K" | |||||
| zh_Hans: "Top K" | |||||
| type: int | |||||
| default: 50 | |||||
| min: 0 | |||||
| max: 100 | |||||
| required: true | |||||
| help: | |||||
| en_US: "The value range is [0,100], which limits the model to only select from the top k words with the highest probability when choosing the next word at each step. The larger the value, the more diverse text generation will be." | |||||
| zh_Hans: "取值范围为 [0,100],限制模型在每一步选择下一个词时,只从概率最高的前 k 个词中选取。数值越大,文本生成越多样。" | |||||
| - name: frequency_penalty | |||||
| use_template: frequency_penalty | |||||
| label: | |||||
| en_US: "Frequency Penalty" | |||||
| zh_Hans: "频率惩罚" | |||||
| type: float | |||||
| default: 0 | |||||
| min: -1.0 | |||||
| max: 1.0 | |||||
| precision: 1 | |||||
| required: false | |||||
| help: | |||||
| en_US: "Used to adjust the frequency of repeated content in automatically generated text. Positive numbers reduce repetition, while negative numbers increase repetition. After setting this parameter, if a word has already appeared in the text, the model will decrease the probability of choosing that word for subsequent generation." | |||||
| zh_Hans: "用于调整自动生成文本中重复内容的频率。正数减少重复,负数增加重复。设置此参数后,如果一个词在文本中已经出现过,模型在后续生成中选择该词的概率会降低。" | |||||
| - name: user | |||||
| use_template: text | |||||
| label: | |||||
| en_US: "User" | |||||
| zh_Hans: "用户" | |||||
| type: string | |||||
| required: false | |||||
| help: | |||||
| en_US: "Used to track and differentiate conversation requests from different users." | |||||
| zh_Hans: "用于追踪和区分不同用户的对话请求。" |
| model: InternVL2.5-26B | |||||
| label: | |||||
| en_US: InternVL2.5-26B | |||||
| model_type: llm | |||||
| features: | |||||
| - vision | |||||
| - agent-thought | |||||
| model_properties: | |||||
| mode: chat | |||||
| context_size: 32000 | |||||
| parameter_rules: | |||||
| - name: max_tokens | |||||
| use_template: max_tokens | |||||
| label: | |||||
| en_US: "Max Tokens" | |||||
| zh_Hans: "最大Token数" | |||||
| type: int | |||||
| default: 512 | |||||
| min: 1 | |||||
| required: true | |||||
| help: | |||||
| en_US: "The maximum number of tokens that can be generated by the model varies depending on the model." | |||||
| zh_Hans: "模型可生成的最大 token 个数,不同模型上限不同。" | |||||
| - name: temperature | |||||
| use_template: temperature | |||||
| label: | |||||
| en_US: "Temperature" | |||||
| zh_Hans: "采样温度" | |||||
| type: float | |||||
| default: 0.7 | |||||
| min: 0.0 | |||||
| max: 1.0 | |||||
| precision: 1 | |||||
| required: true | |||||
| help: | |||||
| en_US: "The randomness of the sampling temperature control output. The temperature value is within the range of [0.0, 1.0]. The higher the value, the more random and creative the output; the lower the value, the more stable it is. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time." | |||||
| zh_Hans: "采样温度控制输出的随机性。温度值在 [0.0, 1.0] 范围内,值越高,输出越随机和创造性;值越低,输出越稳定。建议根据需求调整 top_p 或 temperature 参数,避免同时调整两者。" | |||||
| - name: top_p | |||||
| use_template: top_p | |||||
| label: | |||||
| en_US: "Top P" | |||||
| zh_Hans: "Top P" | |||||
| type: float | |||||
| default: 0.7 | |||||
| min: 0.0 | |||||
| max: 1.0 | |||||
| precision: 1 | |||||
| required: true | |||||
| help: | |||||
| en_US: "The value range of the sampling method is [0.0, 1.0]. The top_p value determines that the model selects tokens from the top p% of candidate words with the highest probability; when top_p is 0, this parameter is invalid. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time." | |||||
| zh_Hans: "采样方法的取值范围为 [0.0,1.0]。top_p 值确定模型从概率最高的前p%的候选词中选取 tokens;当 top_p 为 0 时,此参数无效。建议根据需求调整 top_p 或 temperature 参数,避免同时调整两者。" | |||||
| - name: top_k | |||||
| use_template: top_k | |||||
| label: | |||||
| en_US: "Top K" | |||||
| zh_Hans: "Top K" | |||||
| type: int | |||||
| default: 50 | |||||
| min: 0 | |||||
| max: 100 | |||||
| required: true | |||||
| help: | |||||
| en_US: "The value range is [0,100], which limits the model to only select from the top k words with the highest probability when choosing the next word at each step. The larger the value, the more diverse text generation will be." | |||||
| zh_Hans: "取值范围为 [0,100],限制模型在每一步选择下一个词时,只从概率最高的前 k 个词中选取。数值越大,文本生成越多样。" | |||||
| - name: frequency_penalty | |||||
| use_template: frequency_penalty | |||||
| label: | |||||
| en_US: "Frequency Penalty" | |||||
| zh_Hans: "频率惩罚" | |||||
| type: float | |||||
| default: 0 | |||||
| min: -1.0 | |||||
| max: 1.0 | |||||
| precision: 1 | |||||
| required: false | |||||
| help: | |||||
| en_US: "Used to adjust the frequency of repeated content in automatically generated text. Positive numbers reduce repetition, while negative numbers increase repetition. After setting this parameter, if a word has already appeared in the text, the model will decrease the probability of choosing that word for subsequent generation." | |||||
| zh_Hans: "用于调整自动生成文本中重复内容的频率。正数减少重复,负数增加重复。设置此参数后,如果一个词在文本中已经出现过,模型在后续生成中选择该词的概率会降低。" | |||||
| - name: user | |||||
| use_template: text | |||||
| label: | |||||
| en_US: "User" | |||||
| zh_Hans: "用户" | |||||
| type: string | |||||
| required: false | |||||
| help: | |||||
| en_US: "Used to track and differentiate conversation requests from different users." | |||||
| zh_Hans: "用于追踪和区分不同用户的对话请求。" |
| - deepseek-coder-33B-instruct-chat | - deepseek-coder-33B-instruct-chat | ||||
| - deepseek-coder-33B-instruct-completions | - deepseek-coder-33B-instruct-completions | ||||
| - codegeex4-all-9b | - codegeex4-all-9b | ||||
| - InternVL2.5-26B | |||||
| - InternVL2-8B |
| user: Optional[str] = None, | user: Optional[str] = None, | ||||
| ) -> Union[LLMResult, Generator]: | ) -> Union[LLMResult, Generator]: | ||||
| self._add_custom_parameters(credentials, model, model_parameters) | self._add_custom_parameters(credentials, model, model_parameters) | ||||
| return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user) | |||||
| return super()._invoke( | |||||
| GiteeAILargeLanguageModel.MODEL_TO_IDENTITY.get(model, model), | |||||
| credentials, | |||||
| prompt_messages, | |||||
| model_parameters, | |||||
| tools, | |||||
| stop, | |||||
| stream, | |||||
| user, | |||||
| ) | |||||
| def validate_credentials(self, model: str, credentials: dict) -> None: | def validate_credentials(self, model: str, credentials: dict) -> None: | ||||
| self._add_custom_parameters(credentials, None) | |||||
| super().validate_credentials(model, credentials) | |||||
| self._add_custom_parameters(credentials, model, None) | |||||
| super().validate_credentials(GiteeAILargeLanguageModel.MODEL_TO_IDENTITY.get(model, model), credentials) | |||||
| def _add_custom_parameters(self, credentials: dict, model: Optional[str]) -> None: | |||||
| def _add_custom_parameters(self, credentials: dict, model: Optional[str], model_parameters: dict) -> None: | |||||
| if model is None: | if model is None: | ||||
| model = "Qwen2-72B-Instruct" | model = "Qwen2-72B-Instruct" | ||||
| model_identity = GiteeAILargeLanguageModel.MODEL_TO_IDENTITY.get(model, model) | |||||
| credentials["endpoint_url"] = f"https://ai.gitee.com/api/serverless/{model_identity}/" | |||||
| credentials["endpoint_url"] = "https://ai.gitee.com/v1" | |||||
| if model.endswith("completions"): | if model.endswith("completions"): | ||||
| credentials["mode"] = LLMMode.COMPLETION.value | credentials["mode"] = LLMMode.COMPLETION.value | ||||
| else: | else: |