Browse Source

Added Llama 3.2 Vision Models Speech2Text Models for Groq (#9479)

tags/0.10.0
Tao Wang 1 year ago
parent
commit
b92504bebc
No account linked to committer's email address

+ 1
- 0
api/core/model_runtime/model_providers/groq/groq.yaml View File

@@ -18,6 +18,7 @@ help:
en_US: https://console.groq.com/
supported_model_types:
- llm
- speech2text
configurate_methods:
- predefined-model
provider_credential_schema:

+ 26
- 0
api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-vision-preview.yaml View File

@@ -0,0 +1,26 @@
model: llama-3.2-11b-vision-preview
label:
zh_Hans: Llama 3.2 11B Vision (Preview)
en_US: Llama 3.2 11B Vision (Preview)
model_type: llm
features:
- agent-thought
- vision
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

+ 26
- 0
api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-vision-preview.yaml View File

@@ -0,0 +1,26 @@
model: llama-3.2-90b-vision-preview
label:
zh_Hans: Llama 3.2 90B Vision (Preview)
en_US: Llama 3.2 90B Vision (Preview)
model_type: llm
features:
- agent-thought
- vision
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

+ 0
- 0
api/core/model_runtime/model_providers/groq/speech2text/__init__.py View File


+ 5
- 0
api/core/model_runtime/model_providers/groq/speech2text/distil-whisper-large-v3-en.yaml View File

@@ -0,0 +1,5 @@
model: distil-whisper-large-v3-en
model_type: speech2text
model_properties:
file_upload_limit: 1
supported_file_extensions: flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav,webm

+ 30
- 0
api/core/model_runtime/model_providers/groq/speech2text/speech2text.py View File

@@ -0,0 +1,30 @@
from typing import IO, Optional

from core.model_runtime.model_providers.openai_api_compatible.speech2text.speech2text import OAICompatSpeech2TextModel


class GroqSpeech2TextModel(OAICompatSpeech2TextModel):
"""
Model class for Groq Speech to text model.
"""

def _invoke(self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None) -> str:
"""
Invoke speech2text model

:param model: model name
:param credentials: model credentials
:param file: audio file
:param user: unique user id
:return: text for given audio file
"""
self._add_custom_parameters(credentials)
return super()._invoke(model, credentials, file)

def validate_credentials(self, model: str, credentials: dict) -> None:
self._add_custom_parameters(credentials)
return super().validate_credentials(model, credentials)

@classmethod
def _add_custom_parameters(cls, credentials: dict) -> None:
credentials["endpoint_url"] = "https://api.groq.com/openai/v1"

+ 5
- 0
api/core/model_runtime/model_providers/groq/speech2text/whisper-large-v3-turbo.yaml View File

@@ -0,0 +1,5 @@
model: whisper-large-v3-turbo
model_type: speech2text
model_properties:
file_upload_limit: 1
supported_file_extensions: flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav,webm

+ 5
- 0
api/core/model_runtime/model_providers/groq/speech2text/whisper-large-v3.yaml View File

@@ -0,0 +1,5 @@
model: whisper-large-v3
model_type: speech2text
model_properties:
file_upload_limit: 1
supported_file_extensions: flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav,webm

Loading…
Cancel
Save