Просмотр исходного кода

add support for PerfXCloud (#1883)

### What problem does this PR solve?

#1853  add support for PerfXCloud

### Type of change


- [x] New Feature (non-breaking change which adds functionality)

Co-authored-by: Zhedong Cen <cenzhedong2@126.com>
tags/v0.10.0
黄腾 1 год назад
Родитель
Сommit
9a6dc89156
Аккаунт пользователя с таким Email не найден

+ 153
- 1
conf/llm_factories.json Просмотреть файл

@@ -2442,6 +2442,158 @@
"model_type": "chat"
}
]
}
},
{
"name": "PerfXCloud",
"logo": "",
"tags": "LLM,TEXT EMBEDDING",
"status": "1",
"llm": [
{
"llm_name": "deepseek-v2-chat",
"tags": "LLM,CHAT,4k",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "llama3.1:405b",
"tags": "LLM,CHAT,128k",
"max_tokens": 131072,
"model_type": "chat"
},
{
"llm_name": "Qwen2-72B-Instruct",
"tags": "LLM,CHAT,128k",
"max_tokens": 131072,
"model_type": "chat"
},
{
"llm_name": "Qwen2-72B-Instruct-GPTQ-Int4",
"tags": "LLM,CHAT,2k",
"max_tokens": 2048,
"model_type": "chat"
},
{
"llm_name": "Qwen2-72B-Instruct-awq-int4",
"tags": "LLM,CHAT,32k",
"max_tokens": 32768,
"model_type": "chat"
},
{
"llm_name": "Llama3-Chinese_v2",
"tags": "LLM,CHAT,8k",
"max_tokens": 8192,
"model_type": "chat"
},
{
"llm_name": "Yi-1_5-9B-Chat-16K",
"tags": "LLM,CHAT,16k",
"max_tokens": 16384,
"model_type": "chat"
},
{
"llm_name": "Qwen1.5-72B-Chat-GPTQ-Int4",
"tags": "LLM,CHAT,2k",
"max_tokens": 2048,
"model_type": "chat"
},
{
"llm_name": "Meta-Llama-3.1-8B-Instruct",
"tags": "LLM,CHAT,4k",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "Qwen2-7B-Instruct",
"tags": "LLM,CHAT,32k",
"max_tokens": 32768,
"model_type": "chat"
},
{
"llm_name": "deepseek-v2-lite-chat",
"tags": "LLM,CHAT,2k",
"max_tokens": 2048,
"model_type": "chat"
},
{
"llm_name": "Qwen2-7B",
"tags": "LLM,CHAT,128k",
"max_tokens": 131072,
"model_type": "chat"
},
{
"llm_name": "chatglm3-6b",
"tags": "LLM,CHAT,8k",
"max_tokens": 8192,
"model_type": "chat"
},
{
"llm_name": "Meta-Llama-3-70B-Instruct-GPTQ-Int4",
"tags": "LLM,CHAT,1k",
"max_tokens": 1024,
"model_type": "chat"
},
{
"llm_name": "Meta-Llama-3-8B-Instruct",
"tags": "LLM,CHAT,8k",
"max_tokens": 8192,
"model_type": "chat"
},
{
"llm_name": "Mistral-7B-Instruct",
"tags": "LLM,CHAT,32k",
"max_tokens": 32768,
"model_type": "chat"
},
{
"llm_name": "MindChat-Qwen-7B-v2",
"tags": "LLM,CHAT,2k",
"max_tokens": 2048,
"model_type": "chat"
},
{
"llm_name": "phi-2",
"tags": "LLM,CHAT,2k",
"max_tokens": 2048,
"model_type": "chat"
},
{
"llm_name": "SOLAR-10_7B-Instruct",
"tags": "LLM,CHAT,4k",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "Mixtral-8x7B-Instruct-v0.1-GPTQ",
"tags": "LLM,CHAT,32k",
"max_tokens": 32768,
"model_type": "chat"
},
{
"llm_name": "Qwen1.5-7B",
"tags": "LLM,CHAT,32k",
"max_tokens": 32768,
"model_type": "chat"
},
{
"llm_name": "BAAI/bge-large-en-v1.5",
"tags": "TEXT EMBEDDING",
"max_tokens": 512,
"model_type": "embedding"
},
{
"llm_name": "BAAI/bge-large-zh-v1.5",
"tags": "TEXT EMBEDDING",
"max_tokens": 1024,
"model_type": "embedding"
},
{
"llm_name": "BAAI/bge-m3",
"tags": "TEXT EMBEDDING",
"max_tokens": 8192,
"model_type": "embedding"
}
]
}
]
}

+ 4
- 2
rag/llm/__init__.py Просмотреть файл

@@ -38,7 +38,8 @@ EmbeddingModel = {
"NVIDIA": NvidiaEmbed,
"LM-Studio": LmStudioEmbed,
"OpenAI-API-Compatible": OpenAI_APIEmbed,
"cohere": CoHereEmbed
"cohere": CoHereEmbed,
"PerfXCloud": PerfXCloudEmbed,
}


@@ -84,7 +85,8 @@ ChatModel = {
"LM-Studio": LmStudioChat,
"OpenAI-API-Compatible": OpenAI_APIChat,
"cohere": CoHereChat,
"LeptonAI": LeptonAIChat
"LeptonAI": LeptonAIChat,
"PerfXCloud": PerfXCloudChat
}



+ 8
- 1
rag/llm/chat_model.py Просмотреть файл

@@ -987,4 +987,11 @@ class LeptonAIChat(Base):
def __init__(self, key, model_name, base_url=None):
if not base_url:
base_url = os.path.join("https://"+model_name+".lepton.run","api","v1")
super().__init__(key, model_name, base_url)
super().__init__(key, model_name, base_url)


class PerfXCloudChat(Base):
def __init__(self, key, model_name, base_url="https://cloud.perfxlab.cn/v1"):
if not base_url:
base_url = "https://cloud.perfxlab.cn/v1"
super().__init__(key, model_name, base_url)

+ 7
- 0
rag/llm/embedding_model.py Просмотреть файл

@@ -553,3 +553,10 @@ class CoHereEmbed(Base):
return np.array([d for d in res.embeddings.float]), int(
res.meta.billed_units.input_tokens
)


class PerfXCloudEmbed(OpenAIEmbed):
def __init__(self, key, model_name, base_url="https://cloud.perfxlab.cn/v1"):
if not base_url:
base_url = "https://cloud.perfxlab.cn/v1"
super().__init__(key, model_name, base_url)

+ 11
- 0
web/src/assets/svg/llm/perfx-cloud.svg
Разница между файлами не показана из-за своего большого размера
Просмотреть файл


+ 1
- 0
web/src/pages/user-setting/setting-model/constant.ts Просмотреть файл

@@ -25,6 +25,7 @@ export const IconMap = {
'OpenAI-API-Compatible': 'openai-api',
cohere: 'cohere',
Lepton: 'lepton',
PerfXCould: 'perfx-could'
};

export const BedrockRegionList = [

Загрузка…
Отмена
Сохранить