浏览代码

add embedding input type parameter (#8724)

tags/0.9.0
Jyong 1年前
父节点
当前提交
4669eb24be
没有帐户链接到提交者的电子邮件
共有 33 个文件被更改,包括 239 次插入35 次删除
  1. 7
    2
      api/core/embedding/cached_embedding.py
  2. 10
    0
      api/core/embedding/embedding_constant.py
  3. 6
    1
      api/core/model_manager.py
  4. 18
    5
      api/core/model_runtime/model_providers/__base/text_embedding_model.py
  5. 7
    1
      api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
  6. 7
    1
      api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
  7. 7
    1
      api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
  8. 7
    1
      api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
  9. 7
    1
      api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
  10. 7
    1
      api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
  11. 7
    1
      api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
  12. 7
    1
      api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
  13. 7
    1
      api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
  14. 7
    1
      api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
  15. 7
    1
      api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py
  16. 2
    0
      api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
  17. 7
    1
      api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
  18. 8
    1
      api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
  19. 7
    1
      api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
  20. 7
    1
      api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
  21. 7
    1
      api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
  22. 7
    1
      api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
  23. 8
    1
      api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
  24. 7
    1
      api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
  25. 7
    1
      api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
  26. 7
    1
      api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
  27. 2
    0
      api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py
  28. 9
    1
      api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py
  29. 7
    1
      api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py
  30. 7
    1
      api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py
  31. 7
    1
      api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py
  32. 8
    1
      api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py
  33. 7
    1
      api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py

+ 7
- 2
api/core/embedding/cached_embedding.py 查看文件

import numpy as np import numpy as np
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_manager import ModelInstance from core.model_manager import ModelInstance
from core.model_runtime.entities.model_entities import ModelPropertyKey from core.model_runtime.entities.model_entities import ModelPropertyKey
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
for i in range(0, len(embedding_queue_texts), max_chunks): for i in range(0, len(embedding_queue_texts), max_chunks):
batch_texts = embedding_queue_texts[i : i + max_chunks] batch_texts = embedding_queue_texts[i : i + max_chunks]


embedding_result = self._model_instance.invoke_text_embedding(texts=batch_texts, user=self._user)
embedding_result = self._model_instance.invoke_text_embedding(
texts=batch_texts, user=self._user, input_type=EmbeddingInputType.DOCUMENT
)


for vector in embedding_result.embeddings: for vector in embedding_result.embeddings:
try: try:
redis_client.expire(embedding_cache_key, 600) redis_client.expire(embedding_cache_key, 600)
return list(np.frombuffer(base64.b64decode(embedding), dtype="float")) return list(np.frombuffer(base64.b64decode(embedding), dtype="float"))
try: try:
embedding_result = self._model_instance.invoke_text_embedding(texts=[text], user=self._user)
embedding_result = self._model_instance.invoke_text_embedding(
texts=[text], user=self._user, input_type=EmbeddingInputType.QUERY
)


embedding_results = embedding_result.embeddings[0] embedding_results = embedding_result.embeddings[0]
embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist() embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist()

+ 10
- 0
api/core/embedding/embedding_constant.py 查看文件

from enum import Enum


class EmbeddingInputType(Enum):
"""
Enum for embedding input type.
"""

DOCUMENT = "document"
QUERY = "query"

+ 6
- 1
api/core/model_manager.py 查看文件

from collections.abc import Callable, Generator, Sequence from collections.abc import Callable, Generator, Sequence
from typing import IO, Optional, Union, cast from typing import IO, Optional, Union, cast


from core.embedding.embedding_constant import EmbeddingInputType
from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
from core.entities.provider_entities import ModelLoadBalancingConfiguration from core.entities.provider_entities import ModelLoadBalancingConfiguration
from core.errors.error import ProviderTokenNotInitError from core.errors.error import ProviderTokenNotInitError
tools=tools, tools=tools,
) )


def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) -> TextEmbeddingResult:
def invoke_text_embedding(
self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT
) -> TextEmbeddingResult:
""" """
Invoke large language model Invoke large language model


:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
if not isinstance(self.model_type_instance, TextEmbeddingModel): if not isinstance(self.model_type_instance, TextEmbeddingModel):
credentials=self.credentials, credentials=self.credentials,
texts=texts, texts=texts,
user=user, user=user,
input_type=input_type,
) )


def get_text_embedding_num_tokens(self, texts: list[str]) -> int: def get_text_embedding_num_tokens(self, texts: list[str]) -> int:

+ 18
- 5
api/core/model_runtime/model_providers/__base/text_embedding_model.py 查看文件



from pydantic import ConfigDict from pydantic import ConfigDict


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.model_providers.__base.ai_model import AIModel from core.model_runtime.model_providers.__base.ai_model import AIModel
model_config = ConfigDict(protected_namespaces=()) model_config = ConfigDict(protected_namespaces=())


def invoke( def invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke large language model
Invoke text embedding model


:param model: model name :param model: model name
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
self.started_at = time.perf_counter() self.started_at = time.perf_counter()


try: try:
return self._invoke(model, credentials, texts, user)
return self._invoke(model, credentials, texts, user, input_type)
except Exception as e: except Exception as e:
raise self._transform_invoke_error(e) raise self._transform_invoke_error(e)


@abstractmethod @abstractmethod
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke large language model
Invoke text embedding model


:param model: model name :param model: model name
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
raise NotImplementedError raise NotImplementedError

+ 7
- 1
api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py 查看文件

import tiktoken import tiktoken
from openai import AzureOpenAI from openai import AzureOpenAI


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import AIModelEntity, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.errors.validate import CredentialsValidateFailedError


class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel): class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
base_model_name = credentials["base_model_name"] base_model_name = credentials["base_model_name"]
credentials_kwargs = self._to_credential_kwargs(credentials) credentials_kwargs = self._to_credential_kwargs(credentials)

+ 7
- 1
api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py 查看文件



from requests import post from requests import post


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
api_base: str = "http://api.baichuan-ai.com/v1/embeddings" api_base: str = "http://api.baichuan-ai.com/v1/embeddings"


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 7
- 1
api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py 查看文件

UnknownServiceError, UnknownServiceError,
) )


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (


class BedrockTextEmbeddingModel(TextEmbeddingModel): class BedrockTextEmbeddingModel(TextEmbeddingModel):
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 7
- 1
api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py 查看文件

import numpy as np import numpy as np
from cohere.core import RequestOptions from cohere.core import RequestOptions


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
""" """


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 7
- 1
api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py 查看文件

import requests import requests
from huggingface_hub import HfApi, InferenceClient from huggingface_hub import HfApi, InferenceClient


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult


class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel): class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel):
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
client = InferenceClient(token=credentials["huggingfacehub_api_token"]) client = InferenceClient(token=credentials["huggingfacehub_api_token"])



+ 7
- 1
api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py 查看文件

import time import time
from typing import Optional from typing import Optional
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 7
- 1
api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py 查看文件

from tencentcloud.common.profile.http_profile import HttpProfile from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.hunyuan.v20230901 import hunyuan_client, models from tencentcloud.hunyuan.v20230901 import hunyuan_client, models


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
""" """


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 7
- 1
api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py 查看文件



from requests import post from requests import post


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
return data return data


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 7
- 1
api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py 查看文件

from requests import post from requests import post
from yarl import URL from yarl import URL


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
""" """


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 7
- 1
api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py 查看文件



from requests import post from requests import post


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
api_base: str = "https://api.minimax.chat/v1/embeddings" api_base: str = "https://api.minimax.chat/v1/embeddings"


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 7
- 1
api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py 查看文件



import requests import requests


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
api_base: str = "https://api.mixedbread.ai/v1" api_base: str = "https://api.mixedbread.ai/v1"


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 2
- 0
api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py 查看文件

from nomic import embed from nomic import embed
from nomic import login as nomic_login from nomic import login as nomic_login


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import ( from core.model_runtime.entities.text_embedding_entities import (
EmbeddingUsage, EmbeddingUsage,
credentials: dict, credentials: dict,
texts: list[str], texts: list[str],
user: Optional[str] = None, user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 7
- 1
api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py 查看文件



from requests import post from requests import post


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
models: list[str] = ["NV-Embed-QA"] models: list[str] = ["NV-Embed-QA"]


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 8
- 1
api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py 查看文件

import numpy as np import numpy as np
import oci import oci


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
""" """


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
# get model properties # get model properties

+ 7
- 1
api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py 查看文件

import numpy as np import numpy as np
import requests import requests


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,
""" """


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 7
- 1
api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py 查看文件

import tiktoken import tiktoken
from openai import OpenAI from openai import OpenAI


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.errors.validate import CredentialsValidateFailedError
""" """


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 7
- 1
api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py 查看文件

import numpy as np import numpy as np
import requests import requests


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,
""" """


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 7
- 1
api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py 查看文件

from requests import post from requests import post
from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
""" """


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 8
- 1
api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py 查看文件

import numpy as np import numpy as np
import requests import requests


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,
""" """


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """



+ 7
- 1
api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py 查看文件



from replicate import Client as ReplicateClient from replicate import Client as ReplicateClient


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult


class ReplicateEmbeddingModel(_CommonReplicate, TextEmbeddingModel): class ReplicateEmbeddingModel(_CommonReplicate, TextEmbeddingModel):
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
client = ReplicateClient(api_token=credentials["replicate_api_token"], timeout=30) client = ReplicateClient(api_token=credentials["replicate_api_token"], timeout=30)



+ 7
- 1
api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py 查看文件



import boto3 import boto3


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
return embeddings return embeddings


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 7
- 1
api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py 查看文件

from typing import Optional from typing import Optional


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import ( from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
OAICompatEmbeddingModel, OAICompatEmbeddingModel,
super().validate_credentials(model, credentials) super().validate_credentials(model, credentials)


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
self._add_custom_parameters(credentials) self._add_custom_parameters(credentials)
return super()._invoke(model, credentials, texts, user) return super()._invoke(model, credentials, texts, user)

+ 2
- 0
api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py 查看文件

import dashscope import dashscope
import numpy as np import numpy as np


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import ( from core.model_runtime.entities.text_embedding_entities import (
EmbeddingUsage, EmbeddingUsage,
credentials: dict, credentials: dict,
texts: list[str], texts: list[str],
user: Optional[str] = None, user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 9
- 1
api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py 查看文件

from openai import OpenAI from openai import OpenAI
from tokenizers import Tokenizer from tokenizers import Tokenizer


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.errors.validate import CredentialsValidateFailedError
def _get_tokenizer(self) -> Tokenizer: def _get_tokenizer(self) -> Tokenizer:
return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer") return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer")


def _invoke(self, model: str, credentials: dict, texts: list[str], user: str | None = None) -> TextEmbeddingResult:
def _invoke(
self,
model: str,
credentials: dict,
texts: list[str],
user: str | None = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model



+ 7
- 1
api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py 查看文件

from google.oauth2 import service_account from google.oauth2 import service_account
from vertexai.language_models import TextEmbeddingModel as VertexTextEmbeddingModel from vertexai.language_models import TextEmbeddingModel as VertexTextEmbeddingModel


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,
""" """


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 7
- 1
api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py 查看文件

from decimal import Decimal from decimal import Decimal
from typing import Optional from typing import Optional


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,
""" """


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 7
- 1
api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py 查看文件

import numpy as np import numpy as np
from requests import Response, post from requests import Response, post


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import InvokeError from core.model_runtime.errors.invoke import InvokeError
return WenxinTextEmbedding(api_key, secret_key) return WenxinTextEmbedding(api_key, secret_key)


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

+ 8
- 1
api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py 查看文件



from xinference_client.client.restful.restful_client import Client, RESTfulEmbeddingModelHandle from xinference_client.client.restful.restful_client import Client, RESTfulEmbeddingModelHandle


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
""" """


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
server_url = credentials["server_url"] server_url = credentials["server_url"]

+ 7
- 1
api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py 查看文件

import time import time
from typing import Optional from typing import Optional


from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.errors.validate import CredentialsValidateFailedError
""" """


def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model

正在加载...
取消
保存