Sfoglia il codice sorgente

azure openai add gpt-4-1106-preview、gpt-4-vision-preview models (#1751)

Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
tags/0.3.34
Charlie.Wei 1 anno fa
parent
commit
b0d8d196e1
Nessun account collegato all'indirizzo email del committer

+ 2
- 1
api/core/model_providers/models/llm/azure_openai_model.py Vedi File

@@ -23,7 +23,8 @@ FUNCTION_CALL_MODELS = [
'gpt-4',
'gpt-4-32k',
'gpt-35-turbo',
'gpt-35-turbo-16k'
'gpt-35-turbo-16k',
'gpt-4-1106-preview'
]

class AzureOpenAIModel(BaseLLM):

+ 30
- 0
api/core/model_providers/providers/azure_openai_provider.py Vedi File

@@ -122,6 +122,22 @@ class AzureOpenAIProvider(BaseModelProvider):
ModelFeature.AGENT_THOUGHT.value
]
},
{
'id': 'gpt-4-1106-preview',
'name': 'gpt-4-1106-preview',
'mode': ModelMode.CHAT.value,
'features': [
ModelFeature.AGENT_THOUGHT.value
]
},
{
'id': 'gpt-4-vision-preview',
'name': 'gpt-4-vision-preview',
'mode': ModelMode.CHAT.value,
'features': [
ModelFeature.VISION.value
]
},
{
'id': 'text-davinci-003',
'name': 'text-davinci-003',
@@ -171,6 +187,8 @@ class AzureOpenAIProvider(BaseModelProvider):
base_model_max_tokens = {
'gpt-4': 8192,
'gpt-4-32k': 32768,
'gpt-4-1106-preview': 4096,
'gpt-4-vision-preview': 4096,
'gpt-35-turbo': 4096,
'gpt-35-turbo-16k': 16384,
'text-davinci-003': 4097,
@@ -376,6 +394,18 @@ class AzureOpenAIProvider(BaseModelProvider):
provider_credentials=credentials
)

self._add_provider_model(
model_name='gpt-4-1106-preview',
model_type=ModelType.TEXT_GENERATION,
provider_credentials=credentials
)

self._add_provider_model(
model_name='gpt-4-vision-preview',
model_type=ModelType.TEXT_GENERATION,
provider_credentials=credentials
)

self._add_provider_model(
model_name='text-davinci-003',
model_type=ModelType.TEXT_GENERATION,

+ 12
- 0
api/core/model_providers/rules/azure_openai.json Vedi File

@@ -21,6 +21,18 @@
"unit": "0.001",
"currency": "USD"
},
"gpt-4-1106-preview": {
"prompt": "0.01",
"completion": "0.03",
"unit": "0.001",
"currency": "USD"
},
"gpt-4-vision-preview": {
"prompt": "0.01",
"completion": "0.03",
"unit": "0.001",
"currency": "USD"
},
"gpt-35-turbo": {
"prompt": "0.002",
"completion": "0.0015",

+ 60
- 10
api/core/third_party/langchain/llms/azure_chat_open_ai.py Vedi File

@@ -1,11 +1,13 @@
from typing import Dict, Any, Optional, List, Tuple, Union
from typing import Dict, Any, Optional, List, Tuple, Union, cast

from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.chat_models import AzureChatOpenAI
from langchain.chat_models.openai import _convert_dict_to_message
from langchain.schema import ChatResult, BaseMessage, ChatGeneration
from pydantic import root_validator

from langchain.schema import ChatResult, BaseMessage, ChatGeneration, ChatMessage, HumanMessage, AIMessage, SystemMessage, FunctionMessage
from core.model_providers.models.entity.message import LCHumanMessageWithFiles, PromptMessageFileType, ImagePromptMessageFile


class EnhanceAzureChatOpenAI(AzureChatOpenAI):
request_timeout: Optional[Union[float, Tuple[float, float]]] = (5.0, 300.0)
@@ -51,13 +53,18 @@ class EnhanceAzureChatOpenAI(AzureChatOpenAI):
}

def _generate(
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> ChatResult:
message_dicts, params = self._create_message_dicts(messages, stop)
params = self._client_params
if stop is not None:
if "stop" in params:
raise ValueError("`stop` found in both the input and default params.")
params["stop"] = stop
message_dicts = [self._convert_message_to_dict(m) for m in messages]
params = {**params, **kwargs}
if self.streaming:
inner_completion = ""
@@ -65,7 +72,7 @@ class EnhanceAzureChatOpenAI(AzureChatOpenAI):
params["stream"] = True
function_call: Optional[dict] = None
for stream_resp in self.completion_with_retry(
messages=message_dicts, **params
messages=message_dicts, **params
):
if len(stream_resp["choices"]) > 0:
role = stream_resp["choices"][0]["delta"].get("role", role)
@@ -88,4 +95,47 @@ class EnhanceAzureChatOpenAI(AzureChatOpenAI):
)
return ChatResult(generations=[ChatGeneration(message=message)])
response = self.completion_with_retry(messages=message_dicts, **params)
return self._create_chat_result(response)
return self._create_chat_result(response)

def _convert_message_to_dict(self, message: BaseMessage) -> dict:
if isinstance(message, ChatMessage):
message_dict = {"role": message.role, "content": message.content}
elif isinstance(message, LCHumanMessageWithFiles):
content = [
{
"type": "text",
"text": message.content
}
]

for file in message.files:
if file.type == PromptMessageFileType.IMAGE:
file = cast(ImagePromptMessageFile, file)
content.append({
"type": "image_url",
"image_url": {
"url": file.data,
"detail": file.detail.value
}
})

message_dict = {"role": "user", "content": content}
elif isinstance(message, HumanMessage):
message_dict = {"role": "user", "content": message.content}
elif isinstance(message, AIMessage):
message_dict = {"role": "assistant", "content": message.content}
if "function_call" in message.additional_kwargs:
message_dict["function_call"] = message.additional_kwargs["function_call"]
elif isinstance(message, SystemMessage):
message_dict = {"role": "system", "content": message.content}
elif isinstance(message, FunctionMessage):
message_dict = {
"role": "function",
"content": message.content,
"name": message.name,
}
else:
raise ValueError(f"Got unknown type {message}")
if "name" in message.additional_kwargs:
message_dict["name"] = message.additional_kwargs["name"]
return message_dict

+ 1
- 1
web/i18n/lang/common.en.ts Vedi File

@@ -65,7 +65,7 @@ const translation = {
'How much to penalize new tokens based on their existing frequency in the text so far.\nDecreases the model\'s likelihood to repeat the same line verbatim.',
max_tokens: 'Max token',
max_tokensTip:
'Used to limit the maximum length of the reply, in tokens. \nLarger values may limit the space left for prompt words, chat logs, and Knowledge. \nIt is recommended to set it below two-thirds.',
'Used to limit the maximum length of the reply, in tokens. \nLarger values may limit the space left for prompt words, chat logs, and Knowledge. \nIt is recommended to set it below two-thirds\ngpt-4-1106-preview, gpt-4-vision-preview max token (input 128k output 4k)',
maxTokenSettingTip: 'Your max token setting is high, potentially limiting space for prompts, queries, and data. Consider setting it below 2/3.',
setToCurrentModelMaxTokenTip: 'Max token is updated to the 80% maximum token of the current model {{maxToken}}.',
stop_sequences: 'Stop sequences',

+ 1
- 1
web/i18n/lang/common.zh.ts Vedi File

@@ -65,7 +65,7 @@ const translation = {
'影响常见与罕见词汇使用。\n值较大时,倾向于生成不常见的词汇和表达方式。\n值越小,更倾向于使用常见和普遍接受的词汇或短语。',
max_tokens: '单次回复限制 max_tokens',
max_tokensTip:
'用于限制回复的最大长度,以 token 为单位。\n较大的值可能会限制给提示词、聊天记录和知识库留出的空间。\n建议将其设置在三分之二以下。',
'用于限制回复的最大长度,以 token 为单位。\n较大的值可能会限制给提示词、聊天记录和知识库留出的空间。\n建议将其设置在三分之二以下。\ngpt-4-1106-preview、gpt-4-vision-preview 最大长度 (输入128k,输出4k)',
maxTokenSettingTip: '您设置的最大 tokens 数较大,可能会导致 prompt、用户问题、知识库内容没有 token 空间进行处理,建议设置到 2/3 以下。',
setToCurrentModelMaxTokenTip: '最大令牌数更新为当前模型最大的令牌数 {{maxToken}} 的 80%。',
stop_sequences: '停止序列 stop_sequences',

+ 1
- 1
web/package.json Vedi File

@@ -98,7 +98,7 @@
"@types/sortablejs": "^1.15.1",
"autoprefixer": "^10.4.14",
"cross-env": "^7.0.3",
"eslint": "8.36.0",
"eslint": "^8.36.0",
"eslint-config-next": "^13.4.7",
"husky": "^8.0.3",
"lint-staged": "^13.2.2",

+ 705
- 323
web/yarn.lock
File diff soppresso perché troppo grande
Vedi File


Loading…
Annulla
Salva