Browse Source

feat: Add response format support for openai compat models (#12240)

Co-authored-by: Gio Gutierrez <giovannygutierrez@gmail.com>
tags/0.15.0
Giovanny Gutiérrez 10 months ago
parent
commit
d7c0bc8c23
No account linked to committer's email address
20 changed files with 242 additions and 0 deletions
  1. 12
    0
      api/core/model_runtime/model_providers/groq/llm/gemma-7b-it.yaml
  2. 12
    0
      api/core/model_runtime/model_providers/groq/llm/gemma2-9b-it.yaml
  3. 12
    0
      api/core/model_runtime/model_providers/groq/llm/llama-3.1-405b-reasoning.yaml
  4. 13
    0
      api/core/model_runtime/model_providers/groq/llm/llama-3.1-70b-versatile.yaml
  5. 13
    0
      api/core/model_runtime/model_providers/groq/llm/llama-3.1-8b-instant.yaml
  6. 12
    0
      api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
  7. 12
    0
      api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-vision-preview.yaml
  8. 12
    0
      api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
  9. 12
    0
      api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
  10. 12
    0
      api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
  11. 12
    0
      api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-vision-preview.yaml
  12. 13
    0
      api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-specdec.yaml
  13. 13
    0
      api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-versatile.yaml
  14. 12
    0
      api/core/model_runtime/model_providers/groq/llm/llama-guard-3-8b.yaml
  15. 12
    0
      api/core/model_runtime/model_providers/groq/llm/llama2-70b-4096.yaml
  16. 12
    0
      api/core/model_runtime/model_providers/groq/llm/llama3-70b-8192.yaml
  17. 13
    0
      api/core/model_runtime/model_providers/groq/llm/llama3-8b-8192.yaml
  18. 13
    0
      api/core/model_runtime/model_providers/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml
  19. 3
    0
      api/core/model_runtime/model_providers/openai/llm/gpt-4o.yaml
  20. 17
    0
      api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py

+ 12
- 0
api/core/model_runtime/model_providers/groq/llm/gemma-7b-it.yaml View File

@@ -18,6 +18,18 @@ parameter_rules:
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'

+ 12
- 0
api/core/model_runtime/model_providers/groq/llm/gemma2-9b-it.yaml View File

@@ -18,6 +18,18 @@ parameter_rules:
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'

+ 12
- 0
api/core/model_runtime/model_providers/groq/llm/llama-3.1-405b-reasoning.yaml View File

@@ -18,6 +18,18 @@ parameter_rules:
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'

+ 13
- 0
api/core/model_runtime/model_providers/groq/llm/llama-3.1-70b-versatile.yaml View File

@@ -6,6 +6,7 @@ label:
model_type: llm
features:
- agent-thought
- multi-tool-call
model_properties:
mode: chat
context_size: 131072
@@ -19,6 +20,18 @@ parameter_rules:
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'

+ 13
- 0
api/core/model_runtime/model_providers/groq/llm/llama-3.1-8b-instant.yaml View File

@@ -5,6 +5,7 @@ label:
model_type: llm
features:
- agent-thought
- multi-tool-call
model_properties:
mode: chat
context_size: 131072
@@ -18,6 +19,18 @@ parameter_rules:
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'

+ 12
- 0
api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml View File

@@ -19,6 +19,18 @@ parameter_rules:
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'

+ 12
- 0
api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-vision-preview.yaml View File

@@ -19,6 +19,18 @@ parameter_rules:
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'

+ 12
- 0
api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml View File

@@ -18,6 +18,18 @@ parameter_rules:
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'

+ 12
- 0
api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml View File

@@ -18,6 +18,18 @@ parameter_rules:
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'

+ 12
- 0
api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml View File

@@ -19,6 +19,18 @@ parameter_rules:
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'

+ 12
- 0
api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-vision-preview.yaml View File

@@ -19,6 +19,18 @@ parameter_rules:
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'

+ 13
- 0
api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-specdec.yaml View File

@@ -5,6 +5,7 @@ label:
model_type: llm
features:
- agent-thought
- multi-tool-call
model_properties:
mode: chat
context_size: 131072
@@ -18,6 +19,18 @@ parameter_rules:
default: 1024
min: 1
max: 32768
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: "0.05"
output: "0.1"

+ 13
- 0
api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-versatile.yaml View File

@@ -5,6 +5,7 @@ label:
model_type: llm
features:
- agent-thought
- multi-tool-call
model_properties:
mode: chat
context_size: 131072
@@ -18,6 +19,18 @@ parameter_rules:
default: 1024
min: 1
max: 32768
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: "0.05"
output: "0.1"

+ 12
- 0
api/core/model_runtime/model_providers/groq/llm/llama-guard-3-8b.yaml View File

@@ -18,6 +18,18 @@ parameter_rules:
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.20'
output: '0.20'

+ 12
- 0
api/core/model_runtime/model_providers/groq/llm/llama2-70b-4096.yaml View File

@@ -18,6 +18,18 @@ parameter_rules:
default: 512
min: 1
max: 4096
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.7'
output: '0.8'

+ 12
- 0
api/core/model_runtime/model_providers/groq/llm/llama3-70b-8192.yaml View File

@@ -18,6 +18,18 @@ parameter_rules:
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.59'
output: '0.79'

+ 13
- 0
api/core/model_runtime/model_providers/groq/llm/llama3-8b-8192.yaml View File

@@ -5,6 +5,7 @@ label:
model_type: llm
features:
- agent-thought
- multi-tool-call
model_properties:
mode: chat
context_size: 8192
@@ -18,6 +19,18 @@ parameter_rules:
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.08'

+ 13
- 0
api/core/model_runtime/model_providers/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml View File

@@ -5,6 +5,7 @@ label:
model_type: llm
features:
- agent-thought
- multi-tool-call
model_properties:
mode: chat
context_size: 8192
@@ -18,6 +19,18 @@ parameter_rules:
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.08'

+ 3
- 0
api/core/model_runtime/model_providers/openai/llm/gpt-4o.yaml View File

@@ -37,6 +37,9 @@ parameter_rules:
options:
- text
- json_object
- json_schema
- name: json_schema
use_template: json_schema
pricing:
input: '2.50'
output: '10.00'

+ 17
- 0
api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py View File

@@ -332,6 +332,23 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
if not endpoint_url.endswith("/"):
endpoint_url += "/"

response_format = model_parameters.get("response_format")
if response_format:
if response_format == "json_schema":
json_schema = model_parameters.get("json_schema")
if not json_schema:
raise ValueError("Must define JSON Schema when the response format is json_schema")
try:
schema = json.loads(json_schema)
except:
raise ValueError(f"not correct json_schema format: {json_schema}")
model_parameters.pop("json_schema")
model_parameters["response_format"] = {"type": "json_schema", "json_schema": schema}
else:
model_parameters["response_format"] = {"type": response_format}
elif "json_schema" in model_parameters:
del model_parameters["json_schema"]

data = {"model": model, "stream": stream, **model_parameters}

completion_type = LLMMode.value_of(credentials["mode"])

Loading…
Cancel
Save