| @@ -18,6 +18,7 @@ help: | |||
| en_US: https://console.groq.com/ | |||
| supported_model_types: | |||
| - llm | |||
| - speech2text | |||
| configurate_methods: | |||
| - predefined-model | |||
| provider_credential_schema: | |||
| @@ -0,0 +1,26 @@ | |||
| model: llama-3.2-11b-vision-preview | |||
| label: | |||
| zh_Hans: Llama 3.2 11B Vision (Preview) | |||
| en_US: Llama 3.2 11B Vision (Preview) | |||
| model_type: llm | |||
| features: | |||
| - agent-thought | |||
| - vision | |||
| model_properties: | |||
| mode: chat | |||
| context_size: 131072 | |||
| parameter_rules: | |||
| - name: temperature | |||
| use_template: temperature | |||
| - name: top_p | |||
| use_template: top_p | |||
| - name: max_tokens | |||
| use_template: max_tokens | |||
| default: 512 | |||
| min: 1 | |||
| max: 8192 | |||
| pricing: | |||
| input: '0.05' | |||
| output: '0.1' | |||
| unit: '0.000001' | |||
| currency: USD | |||
| @@ -0,0 +1,26 @@ | |||
| model: llama-3.2-90b-vision-preview | |||
| label: | |||
| zh_Hans: Llama 3.2 90B Vision (Preview) | |||
| en_US: Llama 3.2 90B Vision (Preview) | |||
| model_type: llm | |||
| features: | |||
| - agent-thought | |||
| - vision | |||
| model_properties: | |||
| mode: chat | |||
| context_size: 131072 | |||
| parameter_rules: | |||
| - name: temperature | |||
| use_template: temperature | |||
| - name: top_p | |||
| use_template: top_p | |||
| - name: max_tokens | |||
| use_template: max_tokens | |||
| default: 512 | |||
| min: 1 | |||
| max: 8192 | |||
| pricing: | |||
| input: '0.05' | |||
| output: '0.1' | |||
| unit: '0.000001' | |||
| currency: USD | |||
| @@ -0,0 +1,5 @@ | |||
| model: distil-whisper-large-v3-en | |||
| model_type: speech2text | |||
| model_properties: | |||
| file_upload_limit: 1 | |||
| supported_file_extensions: flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav,webm | |||
| @@ -0,0 +1,30 @@ | |||
| from typing import IO, Optional | |||
| from core.model_runtime.model_providers.openai_api_compatible.speech2text.speech2text import OAICompatSpeech2TextModel | |||
| class GroqSpeech2TextModel(OAICompatSpeech2TextModel): | |||
| """ | |||
| Model class for Groq Speech to text model. | |||
| """ | |||
| def _invoke(self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None) -> str: | |||
| """ | |||
| Invoke speech2text model | |||
| :param model: model name | |||
| :param credentials: model credentials | |||
| :param file: audio file | |||
| :param user: unique user id | |||
| :return: text for given audio file | |||
| """ | |||
| self._add_custom_parameters(credentials) | |||
| return super()._invoke(model, credentials, file) | |||
| def validate_credentials(self, model: str, credentials: dict) -> None: | |||
| self._add_custom_parameters(credentials) | |||
| return super().validate_credentials(model, credentials) | |||
| @classmethod | |||
| def _add_custom_parameters(cls, credentials: dict) -> None: | |||
| credentials["endpoint_url"] = "https://api.groq.com/openai/v1" | |||
| @@ -0,0 +1,5 @@ | |||
| model: whisper-large-v3-turbo | |||
| model_type: speech2text | |||
| model_properties: | |||
| file_upload_limit: 1 | |||
| supported_file_extensions: flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav,webm | |||
| @@ -0,0 +1,5 @@ | |||
| model: whisper-large-v3 | |||
| model_type: speech2text | |||
| model_properties: | |||
| file_upload_limit: 1 | |||
| supported_file_extensions: flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav,webm | |||