Переглянути джерело

Feat: add model provider DeepInfra (#9003)

### What problem does this PR solve?

Add model provider DeepInfra. This model list comes from our community. 

NOTE: most endpoints haven't been tested, but they should work as OpenAI
does.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
tags/v0.20.0
Yongteng Lei 3 місяці тому
джерело
коміт
7ebc1f0943
Аккаунт користувача з таким Email не знайдено

+ 293
- 0
conf/llm_factories.json Переглянути файл

@@ -3830,6 +3830,299 @@
"tags": "LLM,TEXT EMBEDDING,TTS,SPEECH2TEXT,TEXT RE-RANK",
"status": "1",
"llm": []
},
{
"name": "DeepInfra",
"logo": "",
"tags": "LLM,TEXT EMBEDDING,TTS,SPEECH2TEXT,MODERATION",
"status": "1",
"llm": [
{
"llm_name": "moonshotai/Kimi-K2-Instruct",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "mistralai/Voxtral-Small-24B-2507",
"tags": "SPEECH2TEXT",
"model_type": "speech2text"
},
{
"llm_name": "mistralai/Voxtral-Mini-3B-2507",
"tags": "SPEECH2TEXT",
"model_type": "speech2text"
},
{
"llm_name": "deepseek-ai/DeepSeek-R1-0528-Turbo",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "Qwen/Qwen3-235B-A22B",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "Qwen/Qwen3-30B-A3B",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "Qwen/Qwen3-32B",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "Qwen/Qwen3-14B",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "deepseek-ai/DeepSeek-V3-0324-Turbo",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "deepseek-ai/DeepSeek-R1-0528",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "deepseek-ai/DeepSeek-V3-0324",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "mistralai/Devstral-Small-2507",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "mistralai/Mistral-Small-3.2-24B-Instruct-2506",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "meta-llama/Llama-Guard-4-12B",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "Qwen/QwQ-32B",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "anthropic/claude-4-opus",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "anthropic/claude-4-sonnet",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "google/gemini-2.5-flash",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "google/gemini-2.5-pro",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "google/gemma-3-27b-it",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "google/gemma-3-12b-it",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "google/gemma-3-4b-it",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "hexgrad/Kokoro-82M",
"tags": "TTS",
"model_type": "tts"
},
{
"llm_name": "canopylabs/orpheus-3b-0.1-ft",
"tags": "TTS",
"model_type": "tts"
},
{
"llm_name": "sesame/csm-1b",
"tags": "TTS",
"model_type": "tts"
},
{
"llm_name": "microsoft/Phi-4-multimodal-instruct",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "deepseek-ai/DeepSeek-V3",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "meta-llama/Llama-3.3-70B-Instruct",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "microsoft/phi-4",
"tags": "LLM,CHAT",
"model_type": "chat"
},
{
"llm_name": "openai/whisper-large-v3-turbo",
"tags": "SPEECH2TEXT",
"model_type": "speech2text"
},
{
"llm_name": "BAAI/bge-base-en-v1.5",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "BAAI/bge-en-icl",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "BAAI/bge-large-en-v1.5",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "BAAI/bge-m3",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "BAAI/bge-m3-multi",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "Qwen/Qwen3-Embedding-0.6B",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "Qwen/Qwen3-Embedding-4B",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "Qwen/Qwen3-Embedding-8B",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "intfloat/e5-base-v2",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "intfloat/e5-large-v2",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "intfloat/multilingual-e5-large",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "intfloat/multilingual-e5-large-instruct",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "sentence-transformers/all-MiniLM-L12-v2",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "sentence-transformers/all-MiniLM-L6-v2",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "sentence-transformers/all-mpnet-base-v2",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "sentence-transformers/clip-ViT-B-32",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "sentence-transformers/clip-ViT-B-32-multilingual-v1",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "sentence-transformers/multi-qa-mpnet-base-dot-v1",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "sentence-transformers/paraphrase-MiniLM-L6-v2",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "shibing624/text2vec-base-chinese",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "thenlper/gte-base",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
},
{
"llm_name": "thenlper/gte-large",
"tags": "TEXT EMBEDDING",
"model_type": "embedding"
}
]
}
]
}

+ 1
- 0
docs/references/supported_models.mdx Переглянути файл

@@ -62,6 +62,7 @@ A complete list of models supported by RAGFlow, which will continue to expand.
| Youdao | | :heavy_check_mark: | :heavy_check_mark: | | | |
| ZHIPU-AI | :heavy_check_mark: | :heavy_check_mark: | | :heavy_check_mark: | | |
| 01.AI | :heavy_check_mark: | | | | | |
| DeepInfra | :heavy_check_mark: | :heavy_check_mark: | | | :heavy_check_mark: | :heavy_check_mark: |

```mdx-code-block
</APITable>

+ 9
- 0
rag/llm/chat_model.py Переглянути файл

@@ -1682,3 +1682,12 @@ class GPUStackChat(Base):
raise ValueError("Local llm url cannot be None")
base_url = urljoin(base_url, "v1")
super().__init__(key, model_name, base_url, **kwargs)


class DeepInfraChat(Base):
_FACTORY_NAME = "DeepInfra"

def __init__(self, key, model_name, base_url="https://api.deepinfra.com/v1/openai", **kwargs):
if not base_url:
base_url = "https://api.deepinfra.com/v1/openai"
super().__init__(key, model_name, base_url, **kwargs)

+ 11
- 1
rag/llm/embedding_model.py Переглянути файл

@@ -202,9 +202,10 @@ class QWenEmbed(Base):
self.model_name = model_name

def encode(self, texts: list):
import dashscope
import time

import dashscope

batch_size = 4
res = []
token_count = 0
@@ -900,3 +901,12 @@ class GiteeEmbed(SILICONFLOWEmbed):
if not base_url:
base_url = "https://ai.gitee.com/v1/embeddings"
super().__init__(key, model_name, base_url)


class DeepInfraEmbed(OpenAIEmbed):
_FACTORY_NAME = "DeepInfra"

def __init__(self, key, model_name, base_url="https://api.deepinfra.com/v1/openai"):
if not base_url:
base_url = "https://api.deepinfra.com/v1/openai"
super().__init__(key, model_name, base_url)

+ 10
- 0
rag/llm/sequence2txt_model.py Переглянути файл

@@ -208,3 +208,13 @@ class GiteeSeq2txt(Base):
self.client = OpenAI(api_key=key, base_url=base_url)
self.model_name = model_name


class DeepInfraSeq2txt(Base):
_FACTORY_NAME = "DeepInfra"

def __init__(self, key, model_name, base_url="https://api.deepinfra.com/v1/openai", **kwargs):
if not base_url:
base_url = "https://api.deepinfra.com/v1/openai"

self.client = OpenAI(api_key=key, base_url=base_url)
self.model_name = model_name

+ 9
- 0
rag/llm/tts_model.py Переглянути файл

@@ -382,3 +382,12 @@ class SILICONFLOWTTS(Base):
for chunk in response.iter_content():
if chunk:
yield chunk


class DeepInfraTTS(OpenAITTS):
_FACTORY_NAME = "DeepInfra"

def __init__(self, key, model_name, base_url="https://api.deepinfra.com/v1/openai", **kwargs):
if not base_url:
base_url = "https://api.deepinfra.com/v1/openai"
super().__init__(key, model_name, base_url, **kwargs)

+ 1
- 0
web/src/assets/svg/llm/deepinfra.svg Переглянути файл

@@ -0,0 +1 @@
<svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>DeepInfra</title><path d="M3.294 7.821A2.297 2.297 0 011 5.527a2.297 2.297 0 012.294-2.295A2.297 2.297 0 015.59 5.527 2.297 2.297 0 013.294 7.82zm0-3.688a1.396 1.396 0 000 2.79 1.396 1.396 0 000-2.79zM3.294 14.293A2.297 2.297 0 011 11.998a2.297 2.297 0 012.294-2.294 2.297 2.297 0 012.295 2.294 2.297 2.297 0 01-2.295 2.295zm0-3.688a1.395 1.395 0 000 2.788 1.395 1.395 0 100-2.788zM3.294 20.761A2.297 2.297 0 011 18.467a2.297 2.297 0 012.294-2.295 2.297 2.297 0 012.295 2.295 2.297 2.297 0 01-2.295 2.294zm0-3.688a1.396 1.396 0 000 2.79 1.396 1.396 0 000-2.79zM20.738 7.821a2.297 2.297 0 01-2.295-2.294 2.297 2.297 0 012.294-2.295 2.297 2.297 0 012.295 2.295 2.297 2.297 0 01-2.294 2.294zm0-3.688a1.396 1.396 0 101.395 1.395c0-.77-.626-1.395-1.395-1.395zM20.738 14.293a2.297 2.297 0 01-2.295-2.295 2.297 2.297 0 012.294-2.294 2.297 2.297 0 012.295 2.294 2.297 2.297 0 01-2.294 2.295zm0-3.688c-.769 0-1.395.625-1.395 1.393a1.396 1.396 0 002.79 0c0-.77-.626-1.393-1.395-1.393zM20.738 20.761a2.297 2.297 0 01-2.295-2.294 2.297 2.297 0 012.294-2.295 2.297 2.297 0 012.295 2.295 2.297 2.297 0 01-2.294 2.294zm0-3.688a1.396 1.396 0 101.395 1.395c0-.77-.626-1.395-1.395-1.395zM12.016 11.057a2.297 2.297 0 01-2.294-2.294 2.297 2.297 0 012.294-2.295 2.297 2.297 0 012.295 2.295 2.297 2.297 0 01-2.295 2.294zm0-3.688a1.396 1.396 0 101.395 1.395c0-.77-.625-1.395-1.395-1.395zM12.017 4.589a2.297 2.297 0 01-2.295-2.295A2.297 2.297 0 0112.017 0a2.297 2.297 0 012.294 2.294 2.297 2.297 0 01-2.294 2.295zm0-3.688a1.396 1.396 0 101.395 1.395c0-.77-.626-1.395-1.395-1.395zM12.017 17.529a2.297 2.297 0 01-2.295-2.295 2.297 2.297 0 012.295-2.294 2.297 2.297 0 012.294 2.294 2.297 2.297 0 01-2.294 2.295zm0-3.688a1.396 1.396 0 101.395 1.395c0-.77-.626-1.395-1.395-1.395zM12.016 24a2.297 2.297 0 01-2.294-2.295 2.297 2.297 0 012.294-2.294 2.297 2.297 0 012.295 2.294A2.297 2.297 0 0112.016 24zm0-3.688a1.396 1.396 0 101.395 1.395c0-.77-.625-1.395-1.395-1.395z" fill="#2A3275"></path><path d="M8.363 8.222a.742.742 0 01-.277-.053l-1.494-.596a.75.75 0 11.557-1.392l1.493.595a.75.75 0 01-.278 1.446h-.001zM8.363 14.566a.743.743 0 01-.277-.053l-1.494-.595a.75.75 0 11.557-1.393l1.493.596a.75.75 0 01-.278 1.445h-.001zM17.124 11.397a.741.741 0 01-.277-.054l-1.493-.595a.75.75 0 11.555-1.392l1.493.595a.75.75 0 01-.278 1.446zM17.124 5.05a.744.744 0 01-.277-.054L15.354 4.4a.75.75 0 01.555-1.392l1.493.596a.75.75 0 01-.278 1.445zM17.124 17.739a.743.743 0 01-.277-.053l-1.494-.596a.75.75 0 11.556-1.392l1.493.596a.75.75 0 01-.278 1.445zM6.91 17.966a.75.75 0 01-.279-1.445l1.494-.595a.749.749 0 11.556 1.392l-1.493.595a.743.743 0 01-.277.053H6.91zM6.91 11.66a.75.75 0 01-.279-1.446l1.494-.595a.75.75 0 01.556 1.392l-1.493.595a.743.743 0 01-.277.053H6.91zM6.91 5.033a.75.75 0 01-.279-1.446l1.494-.595a.75.75 0 01.556 1.392l-1.493.596a.744.744 0 01-.277.053H6.91zM8.363 21.364a.743.743 0 01-.277-.053l-1.494-.596a.75.75 0 01.555-1.392l1.494.595a.75.75 0 01-.278 1.446zM15.63 8.223a.75.75 0 01-.278-1.447l1.494-.595a.75.75 0 01.556 1.393l-1.494.595a.744.744 0 01-.276.054h-.002zM15.63 14.567a.75.75 0 01-.278-1.446l1.494-.596a.75.75 0 01.556 1.394l-1.494.595a.743.743 0 01-.276.053h-.002zM15.63 21.363a.749.749 0 01-.278-1.445l1.494-.595a.75.75 0 11.555 1.392l-1.494.595a.741.741 0 01-.277.053z" fill="#5699DB"></path></svg>

+ 2
- 0
web/src/constants/llm.ts Переглянути файл

@@ -50,6 +50,7 @@ export enum LLMFactory {
GPUStack = 'GPUStack',
VLLM = 'VLLM',
GiteeAI = 'GiteeAI',
DeepInfra = 'DeepInfra',
}

// Please lowercase the file name
@@ -105,4 +106,5 @@ export const IconMap = {
[LLMFactory.GPUStack]: 'gpustack',
[LLMFactory.VLLM]: 'vllm',
[LLMFactory.GiteeAI]: 'gitee-ai',
[LLMFactory.DeepInfra]: 'deepinfra',
};

Завантаження…
Відмінити
Зберегти