| @@ -1,8 +1,20 @@ | |||
| - deepseek-v2-chat | |||
| - qwen2-72b-instruct | |||
| - qwen2-57b-a14b-instruct | |||
| - qwen2-7b-instruct | |||
| - yi-1.5-34b-chat | |||
| - yi-1.5-9b-chat | |||
| - yi-1.5-6b-chat | |||
| - glm4-9B-chat | |||
| - Qwen/Qwen2-72B-Instruct | |||
| - Qwen/Qwen2-57B-A14B-Instruct | |||
| - Qwen/Qwen2-7B-Instruct | |||
| - Qwen/Qwen2-1.5B-Instruct | |||
| - 01-ai/Yi-1.5-34B-Chat | |||
| - 01-ai/Yi-1.5-9B-Chat-16K | |||
| - 01-ai/Yi-1.5-6B-Chat | |||
| - THUDM/glm-4-9b-chat | |||
| - deepseek-ai/DeepSeek-V2-Chat | |||
| - deepseek-ai/DeepSeek-Coder-V2-Instruct | |||
| - internlm/internlm2_5-7b-chat | |||
| - google/gemma-2-27b-it | |||
| - google/gemma-2-9b-it | |||
| - meta-llama/Meta-Llama-3-70B-Instruct | |||
| - meta-llama/Meta-Llama-3-8B-Instruct | |||
| - meta-llama/Meta-Llama-3.1-405B-Instruct | |||
| - meta-llama/Meta-Llama-3.1-70B-Instruct | |||
| - meta-llama/Meta-Llama-3.1-8B-Instruct | |||
| - mistralai/Mixtral-8x7B-Instruct-v0.1 | |||
| - mistralai/Mistral-7B-Instruct-v0.2 | |||
| @@ -1,4 +1,4 @@ | |||
| model: deepseek-ai/deepseek-v2-chat | |||
| model: deepseek-ai/DeepSeek-V2-Chat | |||
| label: | |||
| en_US: deepseek-ai/DeepSeek-V2-Chat | |||
| model_type: llm | |||
| @@ -0,0 +1,30 @@ | |||
| model: google/gemma-2-27b-it | |||
| label: | |||
| en_US: google/gemma-2-27b-it | |||
| model_type: llm | |||
| features: | |||
| - agent-thought | |||
| model_properties: | |||
| mode: chat | |||
| context_size: 8196 | |||
| parameter_rules: | |||
| - name: temperature | |||
| use_template: temperature | |||
| - name: max_tokens | |||
| use_template: max_tokens | |||
| type: int | |||
| default: 512 | |||
| min: 1 | |||
| max: 4096 | |||
| help: | |||
| zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 | |||
| en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. | |||
| - name: top_p | |||
| use_template: top_p | |||
| - name: frequency_penalty | |||
| use_template: frequency_penalty | |||
| pricing: | |||
| input: '1.26' | |||
| output: '1.26' | |||
| unit: '0.000001' | |||
| currency: RMB | |||
| @@ -0,0 +1,30 @@ | |||
| model: google/gemma-2-9b-it | |||
| label: | |||
| en_US: google/gemma-2-9b-it | |||
| model_type: llm | |||
| features: | |||
| - agent-thought | |||
| model_properties: | |||
| mode: chat | |||
| context_size: 8196 | |||
| parameter_rules: | |||
| - name: temperature | |||
| use_template: temperature | |||
| - name: max_tokens | |||
| use_template: max_tokens | |||
| type: int | |||
| default: 512 | |||
| min: 1 | |||
| max: 4096 | |||
| help: | |||
| zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 | |||
| en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. | |||
| - name: top_p | |||
| use_template: top_p | |||
| - name: frequency_penalty | |||
| use_template: frequency_penalty | |||
| pricing: | |||
| input: '0' | |||
| output: '0' | |||
| unit: '0.000001' | |||
| currency: RMB | |||
| @@ -1,4 +1,4 @@ | |||
| model: zhipuai/glm4-9B-chat | |||
| model: THUDM/glm-4-9b-chat | |||
| label: | |||
| en_US: THUDM/glm-4-9b-chat | |||
| model_type: llm | |||
| @@ -24,7 +24,7 @@ parameter_rules: | |||
| - name: frequency_penalty | |||
| use_template: frequency_penalty | |||
| pricing: | |||
| input: '0.6' | |||
| output: '0.6' | |||
| input: '0' | |||
| output: '0' | |||
| unit: '0.000001' | |||
| currency: RMB | |||
| @@ -0,0 +1,30 @@ | |||
| model: internlm/internlm2_5-7b-chat | |||
| label: | |||
| en_US: internlm/internlm2_5-7b-chat | |||
| model_type: llm | |||
| features: | |||
| - agent-thought | |||
| model_properties: | |||
| mode: chat | |||
| context_size: 32768 | |||
| parameter_rules: | |||
| - name: temperature | |||
| use_template: temperature | |||
| - name: max_tokens | |||
| use_template: max_tokens | |||
| type: int | |||
| default: 512 | |||
| min: 1 | |||
| max: 4096 | |||
| help: | |||
| zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 | |||
| en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. | |||
| - name: top_p | |||
| use_template: top_p | |||
| - name: frequency_penalty | |||
| use_template: frequency_penalty | |||
| pricing: | |||
| input: '0' | |||
| output: '0' | |||
| unit: '0.000001' | |||
| currency: RMB | |||
| @@ -0,0 +1,30 @@ | |||
| model: meta-llama/Meta-Llama-3-70B-Instruct | |||
| label: | |||
| en_US: meta-llama/Meta-Llama-3-70B-Instruct | |||
| model_type: llm | |||
| features: | |||
| - agent-thought | |||
| model_properties: | |||
| mode: chat | |||
| context_size: 32768 | |||
| parameter_rules: | |||
| - name: temperature | |||
| use_template: temperature | |||
| - name: max_tokens | |||
| use_template: max_tokens | |||
| type: int | |||
| default: 512 | |||
| min: 1 | |||
| max: 4096 | |||
| help: | |||
| zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 | |||
| en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. | |||
| - name: top_p | |||
| use_template: top_p | |||
| - name: frequency_penalty | |||
| use_template: frequency_penalty | |||
| pricing: | |||
| input: '4.13' | |||
| output: '4.13' | |||
| unit: '0.000001' | |||
| currency: RMB | |||
| @@ -0,0 +1,30 @@ | |||
| model: meta-llama/Meta-Llama-3-8B-Instruct | |||
| label: | |||
| en_US: meta-llama/Meta-Llama-3-8B-Instruct | |||
| model_type: llm | |||
| features: | |||
| - agent-thought | |||
| model_properties: | |||
| mode: chat | |||
| context_size: 8192 | |||
| parameter_rules: | |||
| - name: temperature | |||
| use_template: temperature | |||
| - name: max_tokens | |||
| use_template: max_tokens | |||
| type: int | |||
| default: 512 | |||
| min: 1 | |||
| max: 4096 | |||
| help: | |||
| zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 | |||
| en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. | |||
| - name: top_p | |||
| use_template: top_p | |||
| - name: frequency_penalty | |||
| use_template: frequency_penalty | |||
| pricing: | |||
| input: '0' | |||
| output: '0' | |||
| unit: '0.000001' | |||
| currency: RMB | |||
| @@ -0,0 +1,30 @@ | |||
| model: meta-llama/Meta-Llama-3.1-405B-Instruct | |||
| label: | |||
| en_US: meta-llama/Meta-Llama-3.1-405B-Instruct | |||
| model_type: llm | |||
| features: | |||
| - agent-thought | |||
| model_properties: | |||
| mode: chat | |||
| context_size: 32768 | |||
| parameter_rules: | |||
| - name: temperature | |||
| use_template: temperature | |||
| - name: max_tokens | |||
| use_template: max_tokens | |||
| type: int | |||
| default: 512 | |||
| min: 1 | |||
| max: 4096 | |||
| help: | |||
| zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 | |||
| en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. | |||
| - name: top_p | |||
| use_template: top_p | |||
| - name: frequency_penalty | |||
| use_template: frequency_penalty | |||
| pricing: | |||
| input: '21' | |||
| output: '21' | |||
| unit: '0.000001' | |||
| currency: RMB | |||
| @@ -0,0 +1,30 @@ | |||
| model: meta-llama/Meta-Llama-3.1-70B-Instruct | |||
| label: | |||
| en_US: meta-llama/Meta-Llama-3.1-70B-Instruct | |||
| model_type: llm | |||
| features: | |||
| - agent-thought | |||
| model_properties: | |||
| mode: chat | |||
| context_size: 32768 | |||
| parameter_rules: | |||
| - name: temperature | |||
| use_template: temperature | |||
| - name: max_tokens | |||
| use_template: max_tokens | |||
| type: int | |||
| default: 512 | |||
| min: 1 | |||
| max: 4096 | |||
| help: | |||
| zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 | |||
| en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. | |||
| - name: top_p | |||
| use_template: top_p | |||
| - name: frequency_penalty | |||
| use_template: frequency_penalty | |||
| pricing: | |||
| input: '4.13' | |||
| output: '4.13' | |||
| unit: '0.000001' | |||
| currency: RMB | |||
| @@ -0,0 +1,30 @@ | |||
| model: meta-llama/Meta-Llama-3.1-8B-Instruct | |||
| label: | |||
| en_US: meta-llama/Meta-Llama-3.1-8B-Instruct | |||
| model_type: llm | |||
| features: | |||
| - agent-thought | |||
| model_properties: | |||
| mode: chat | |||
| context_size: 8192 | |||
| parameter_rules: | |||
| - name: temperature | |||
| use_template: temperature | |||
| - name: max_tokens | |||
| use_template: max_tokens | |||
| type: int | |||
| default: 512 | |||
| min: 1 | |||
| max: 4096 | |||
| help: | |||
| zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 | |||
| en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. | |||
| - name: top_p | |||
| use_template: top_p | |||
| - name: frequency_penalty | |||
| use_template: frequency_penalty | |||
| pricing: | |||
| input: '0' | |||
| output: '0' | |||
| unit: '0.000001' | |||
| currency: RMB | |||
| @@ -0,0 +1,30 @@ | |||
| model: mistralai/Mistral-7B-Instruct-v0.2 | |||
| label: | |||
| en_US: mistralai/Mistral-7B-Instruct-v0.2 | |||
| model_type: llm | |||
| features: | |||
| - agent-thought | |||
| model_properties: | |||
| mode: chat | |||
| context_size: 32768 | |||
| parameter_rules: | |||
| - name: temperature | |||
| use_template: temperature | |||
| - name: max_tokens | |||
| use_template: max_tokens | |||
| type: int | |||
| default: 512 | |||
| min: 1 | |||
| max: 4096 | |||
| help: | |||
| zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 | |||
| en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. | |||
| - name: top_p | |||
| use_template: top_p | |||
| - name: frequency_penalty | |||
| use_template: frequency_penalty | |||
| pricing: | |||
| input: '0' | |||
| output: '0' | |||
| unit: '0.000001' | |||
| currency: RMB | |||
| @@ -0,0 +1,30 @@ | |||
| model: mistralai/Mixtral-8x7B-Instruct-v0.1 | |||
| label: | |||
| en_US: mistralai/Mixtral-8x7B-Instruct-v0.1 | |||
| model_type: llm | |||
| features: | |||
| - agent-thought | |||
| model_properties: | |||
| mode: chat | |||
| context_size: 32768 | |||
| parameter_rules: | |||
| - name: temperature | |||
| use_template: temperature | |||
| - name: max_tokens | |||
| use_template: max_tokens | |||
| type: int | |||
| default: 512 | |||
| min: 1 | |||
| max: 4096 | |||
| help: | |||
| zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 | |||
| en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. | |||
| - name: top_p | |||
| use_template: top_p | |||
| - name: frequency_penalty | |||
| use_template: frequency_penalty | |||
| pricing: | |||
| input: '1.26' | |||
| output: '1.26' | |||
| unit: '0.000001' | |||
| currency: RMB | |||
| @@ -0,0 +1,30 @@ | |||
| model: Qwen/Qwen2-1.5B-Instruct | |||
| label: | |||
| en_US: Qwen/Qwen2-1.5B-Instruct | |||
| model_type: llm | |||
| features: | |||
| - agent-thought | |||
| model_properties: | |||
| mode: chat | |||
| context_size: 32768 | |||
| parameter_rules: | |||
| - name: temperature | |||
| use_template: temperature | |||
| - name: max_tokens | |||
| use_template: max_tokens | |||
| type: int | |||
| default: 512 | |||
| min: 1 | |||
| max: 4096 | |||
| help: | |||
| zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 | |||
| en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. | |||
| - name: top_p | |||
| use_template: top_p | |||
| - name: frequency_penalty | |||
| use_template: frequency_penalty | |||
| pricing: | |||
| input: '0' | |||
| output: '0' | |||
| unit: '0.000001' | |||
| currency: RMB | |||
| @@ -1,4 +1,4 @@ | |||
| model: alibaba/Qwen2-57B-A14B-Instruct | |||
| model: Qwen/Qwen2-57B-A14B-Instruct | |||
| label: | |||
| en_US: Qwen/Qwen2-57B-A14B-Instruct | |||
| model_type: llm | |||
| @@ -1,4 +1,4 @@ | |||
| model: alibaba/Qwen2-72B-Instruct | |||
| model: Qwen/Qwen2-72B-Instruct | |||
| label: | |||
| en_US: Qwen/Qwen2-72B-Instruct | |||
| model_type: llm | |||
| @@ -1,4 +1,4 @@ | |||
| model: alibaba/Qwen2-7B-Instruct | |||
| model: Qwen/Qwen2-7B-Instruct | |||
| label: | |||
| en_US: Qwen/Qwen2-7B-Instruct | |||
| model_type: llm | |||
| @@ -24,7 +24,7 @@ parameter_rules: | |||
| - name: frequency_penalty | |||
| use_template: frequency_penalty | |||
| pricing: | |||
| input: '0.35' | |||
| output: '0.35' | |||
| input: '0' | |||
| output: '0' | |||
| unit: '0.000001' | |||
| currency: RMB | |||
| @@ -24,7 +24,7 @@ parameter_rules: | |||
| - name: frequency_penalty | |||
| use_template: frequency_penalty | |||
| pricing: | |||
| input: '0.35' | |||
| output: '0.35' | |||
| input: '0' | |||
| output: '0' | |||
| unit: '0.000001' | |||
| currency: RMB | |||
| @@ -1,4 +1,4 @@ | |||
| model: 01-ai/Yi-1.5-9B-Chat | |||
| model: 01-ai/Yi-1.5-9B-Chat-16K | |||
| label: | |||
| en_US: 01-ai/Yi-1.5-9B-Chat-16K | |||
| model_type: llm | |||
| @@ -24,7 +24,7 @@ parameter_rules: | |||
| - name: frequency_penalty | |||
| use_template: frequency_penalty | |||
| pricing: | |||
| input: '0.42' | |||
| output: '0.42' | |||
| input: '0' | |||
| output: '0' | |||
| unit: '0.000001' | |||
| currency: RMB | |||
| @@ -15,6 +15,7 @@ help: | |||
| en_US: https://cloud.siliconflow.cn/keys | |||
| supported_model_types: | |||
| - llm | |||
| - text-embedding | |||
| configurate_methods: | |||
| - predefined-model | |||
| provider_credential_schema: | |||
| @@ -0,0 +1,5 @@ | |||
| model: BAAI/bge-large-en-v1.5 | |||
| model_type: text-embedding | |||
| model_properties: | |||
| context_size: 512 | |||
| max_chunks: 1 | |||
| @@ -0,0 +1,5 @@ | |||
| model: BAAI/bge-large-zh-v1.5 | |||
| model_type: text-embedding | |||
| model_properties: | |||
| context_size: 512 | |||
| max_chunks: 1 | |||
| @@ -0,0 +1,29 @@ | |||
| from typing import Optional | |||
| from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult | |||
| from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import ( | |||
| OAICompatEmbeddingModel, | |||
| ) | |||
| class SiliconflowTextEmbeddingModel(OAICompatEmbeddingModel): | |||
| """ | |||
| Model class for Siliconflow text embedding model. | |||
| """ | |||
| def validate_credentials(self, model: str, credentials: dict) -> None: | |||
| self._add_custom_parameters(credentials) | |||
| super().validate_credentials(model, credentials) | |||
| def _invoke(self, model: str, credentials: dict, | |||
| texts: list[str], user: Optional[str] = None) \ | |||
| -> TextEmbeddingResult: | |||
| self._add_custom_parameters(credentials) | |||
| return super()._invoke(model, credentials, texts, user) | |||
| def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int: | |||
| self._add_custom_parameters(credentials) | |||
| return super().get_num_tokens(model, credentials, texts) | |||
| @classmethod | |||
| def _add_custom_parameters(cls, credentials: dict) -> None: | |||
| credentials['endpoint_url'] = 'https://api.siliconflow.cn/v1' | |||
| @@ -0,0 +1,62 @@ | |||
| import os | |||
| import pytest | |||
| from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult | |||
| from core.model_runtime.errors.validate import CredentialsValidateFailedError | |||
| from core.model_runtime.model_providers.siliconflow.text_embedding.text_embedding import ( | |||
| SiliconflowTextEmbeddingModel, | |||
| ) | |||
| def test_validate_credentials(): | |||
| model = SiliconflowTextEmbeddingModel() | |||
| with pytest.raises(CredentialsValidateFailedError): | |||
| model.validate_credentials( | |||
| model="BAAI/bge-large-zh-v1.5", | |||
| credentials={ | |||
| "api_key": "invalid_key" | |||
| }, | |||
| ) | |||
| model.validate_credentials( | |||
| model="BAAI/bge-large-zh-v1.5", | |||
| credentials={ | |||
| "api_key": os.environ.get("API_KEY"), | |||
| }, | |||
| ) | |||
| def test_invoke_model(): | |||
| model = SiliconflowTextEmbeddingModel() | |||
| result = model.invoke( | |||
| model="BAAI/bge-large-zh-v1.5", | |||
| credentials={ | |||
| "api_key": os.environ.get("API_KEY"), | |||
| }, | |||
| texts=[ | |||
| "hello", | |||
| "world", | |||
| ], | |||
| user="abc-123", | |||
| ) | |||
| assert isinstance(result, TextEmbeddingResult) | |||
| assert len(result.embeddings) == 2 | |||
| assert result.usage.total_tokens == 6 | |||
| def test_get_num_tokens(): | |||
| model = SiliconflowTextEmbeddingModel() | |||
| num_tokens = model.get_num_tokens( | |||
| model="BAAI/bge-large-zh-v1.5", | |||
| credentials={ | |||
| "api_key": os.environ.get("API_KEY"), | |||
| }, | |||
| texts=["hello", "world"], | |||
| ) | |||
| assert num_tokens == 2 | |||