- #
- # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- import re
- import threading
- import requests
- import torch
- from FlagEmbedding import FlagReranker
- from huggingface_hub import snapshot_download
- import os
- from abc import ABC
- import numpy as np
- from api.utils.file_utils import get_home_cache_dir
- from rag.utils import num_tokens_from_string, truncate
- import json
-
- def sigmoid(x):
- return 1 / (1 + np.exp(-x))
-
- class Base(ABC):
- def __init__(self, key, model_name):
- pass
-
- def similarity(self, query: str, texts: list):
- raise NotImplementedError("Please implement encode method!")
-
-
- class DefaultRerank(Base):
- _model = None
- _model_lock = threading.Lock()
-
- def __init__(self, key, model_name, **kwargs):
- """
- If you have trouble downloading HuggingFace models, -_^ this might help!!
-
- For Linux:
- export HF_ENDPOINT=https://hf-mirror.com
-
- For Windows:
- Good luck
- ^_-
-
- """
- if not DefaultRerank._model:
- with DefaultRerank._model_lock:
- if not DefaultRerank._model:
- try:
- DefaultRerank._model = FlagReranker(os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z]+/", "", model_name)), use_fp16=torch.cuda.is_available())
- except Exception as e:
- model_dir = snapshot_download(repo_id= model_name,
- local_dir=os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z]+/", "", model_name)),
- local_dir_use_symlinks=False)
- DefaultRerank._model = FlagReranker(model_dir, use_fp16=torch.cuda.is_available())
- self._model = DefaultRerank._model
-
- def similarity(self, query: str, texts: list):
- pairs = [(query,truncate(t, 2048)) for t in texts]
- token_count = 0
- for _, t in pairs:
- token_count += num_tokens_from_string(t)
- batch_size = 4096
- res = []
- for i in range(0, len(pairs), batch_size):
- scores = self._model.compute_score(pairs[i:i + batch_size], max_length=2048)
- scores = sigmoid(np.array(scores)).tolist()
- if isinstance(scores, float): res.append(scores)
- else: res.extend(scores)
- return np.array(res), token_count
-
-
- class JinaRerank(Base):
- def __init__(self, key, model_name="jina-reranker-v1-base-en",
- base_url="https://api.jina.ai/v1/rerank"):
- self.base_url = "https://api.jina.ai/v1/rerank"
- self.headers = {
- "Content-Type": "application/json",
- "Authorization": f"Bearer {key}"
- }
- self.model_name = model_name
-
- def similarity(self, query: str, texts: list):
- texts = [truncate(t, 8196) for t in texts]
- data = {
- "model": self.model_name,
- "query": query,
- "documents": texts,
- "top_n": len(texts)
- }
- res = requests.post(self.base_url, headers=self.headers, json=data).json()
- return np.array([d["relevance_score"] for d in res["results"]]), res["usage"]["total_tokens"]
-
-
- class YoudaoRerank(DefaultRerank):
- _model = None
- _model_lock = threading.Lock()
-
- def __init__(self, key=None, model_name="maidalun1020/bce-reranker-base_v1", **kwargs):
- from BCEmbedding import RerankerModel
- if not YoudaoRerank._model:
- with YoudaoRerank._model_lock:
- if not YoudaoRerank._model:
- try:
- print("LOADING BCE...")
- YoudaoRerank._model = RerankerModel(model_name_or_path=os.path.join(
- get_home_cache_dir(),
- re.sub(r"^[a-zA-Z]+/", "", model_name)))
- except Exception as e:
- YoudaoRerank._model = RerankerModel(
- model_name_or_path=model_name.replace(
- "maidalun1020", "InfiniFlow"))
-
- self._model = YoudaoRerank._model
-
- def similarity(self, query: str, texts: list):
- pairs = [(query, truncate(t, self._model.max_length)) for t in texts]
- token_count = 0
- for _, t in pairs:
- token_count += num_tokens_from_string(t)
- batch_size = 32
- res = []
- for i in range(0, len(pairs), batch_size):
- scores = self._model.compute_score(pairs[i:i + batch_size], max_length=self._model.max_length)
- scores = sigmoid(np.array(scores)).tolist()
- if isinstance(scores, float): res.append(scores)
- else: res.extend(scores)
- return np.array(res), token_count
-
-
- class XInferenceRerank(Base):
- def __init__(self, key="xxxxxxx", model_name="", base_url=""):
- if base_url.split("/")[-1] != "v1":
- base_url = os.path.join(base_url, "v1")
- self.model_name = model_name
- self.base_url = base_url
- self.headers = {
- "Content-Type": "application/json",
- "accept": "application/json"
- }
-
- def similarity(self, query: str, texts: list):
- if len(texts) == 0:
- return np.array([]), 0
- data = {
- "model": self.model_name,
- "query": query,
- "return_documents": "true",
- "return_len": "true",
- "documents": texts
- }
- res = requests.post(self.base_url, headers=self.headers, json=data).json()
- return np.array([d["relevance_score"] for d in res["results"]]), res["meta"]["tokens"]["input_tokens"]+res["meta"]["tokens"]["output_tokens"]
-
-
- class LocalAIRerank(Base):
- def __init__(self, key, model_name, base_url):
- pass
-
- def similarity(self, query: str, texts: list):
- raise NotImplementedError("The LocalAIRerank has not been implement")
-
-
- class NvidiaRerank(Base):
- def __init__(
- self, key, model_name, base_url="https://ai.api.nvidia.com/v1/retrieval/nvidia/"
- ):
- if not base_url:
- base_url = "https://ai.api.nvidia.com/v1/retrieval/nvidia/"
- self.model_name = model_name
-
- if self.model_name == "nvidia/nv-rerankqa-mistral-4b-v3":
- self.base_url = os.path.join(
- base_url, "nv-rerankqa-mistral-4b-v3", "reranking"
- )
-
- if self.model_name == "nvidia/rerank-qa-mistral-4b":
- self.base_url = os.path.join(base_url, "reranking")
- self.model_name = "nv-rerank-qa-mistral-4b:1"
-
- self.headers = {
- "accept": "application/json",
- "Content-Type": "application/json",
- "Authorization": f"Bearer {key}",
- }
-
- def similarity(self, query: str, texts: list):
- token_count = num_tokens_from_string(query) + sum(
- [num_tokens_from_string(t) for t in texts]
- )
- data = {
- "model": self.model_name,
- "query": {"text": query},
- "passages": [{"text": text} for text in texts],
- "truncate": "END",
- "top_n": len(texts),
- }
- res = requests.post(self.base_url, headers=self.headers, json=data).json()
- rank = np.array([d["logit"] for d in res["rankings"]])
- indexs = [d["index"] for d in res["rankings"]]
- return rank[indexs], token_count
-
-
- class LmStudioRerank(Base):
- def __init__(self, key, model_name, base_url):
- pass
-
- def similarity(self, query: str, texts: list):
- raise NotImplementedError("The LmStudioRerank has not been implement")
-
-
- class OpenAI_APIRerank(Base):
- def __init__(self, key, model_name, base_url):
- pass
-
- def similarity(self, query: str, texts: list):
- raise NotImplementedError("The api has not been implement")
-
-
- class CoHereRerank(Base):
- def __init__(self, key, model_name, base_url=None):
- from cohere import Client
-
- self.client = Client(api_key=key)
- self.model_name = model_name
-
- def similarity(self, query: str, texts: list):
- token_count = num_tokens_from_string(query) + sum(
- [num_tokens_from_string(t) for t in texts]
- )
- res = self.client.rerank(
- model=self.model_name,
- query=query,
- documents=texts,
- top_n=len(texts),
- return_documents=False,
- )
- rank = np.array([d.relevance_score for d in res.results])
- indexs = [d.index for d in res.results]
- return rank[indexs], token_count
-
-
- class TogetherAIRerank(Base):
- def __init__(self, key, model_name, base_url):
- pass
-
- def similarity(self, query: str, texts: list):
- raise NotImplementedError("The api has not been implement")
-
-
- class SILICONFLOWRerank(Base):
- def __init__(
- self, key, model_name, base_url="https://api.siliconflow.cn/v1/rerank"
- ):
- if not base_url:
- base_url = "https://api.siliconflow.cn/v1/rerank"
- self.model_name = model_name
- self.base_url = base_url
- self.headers = {
- "accept": "application/json",
- "content-type": "application/json",
- "authorization": f"Bearer {key}",
- }
-
- def similarity(self, query: str, texts: list):
- payload = {
- "model": self.model_name,
- "query": query,
- "documents": texts,
- "top_n": len(texts),
- "return_documents": False,
- "max_chunks_per_doc": 1024,
- "overlap_tokens": 80,
- }
- response = requests.post(
- self.base_url, json=payload, headers=self.headers
- ).json()
- rank = np.array([d["relevance_score"] for d in response["results"]])
- indexs = [d["index"] for d in response["results"]]
- return (
- rank[indexs],
- response["meta"]["tokens"]["input_tokens"] + response["meta"]["tokens"]["output_tokens"],
- )
-
-
- class BaiduYiyanRerank(Base):
- def __init__(self, key, model_name, base_url=None):
- from qianfan.resources import Reranker
-
- key = json.loads(key)
- ak = key.get("yiyan_ak", "")
- sk = key.get("yiyan_sk", "")
- self.client = Reranker(ak=ak, sk=sk)
- self.model_name = model_name
-
- def similarity(self, query: str, texts: list):
- res = self.client.do(
- model=self.model_name,
- query=query,
- documents=texts,
- top_n=len(texts),
- ).body
- rank = np.array([d["relevance_score"] for d in res["results"]])
- indexs = [d["index"] for d in res["results"]]
- return rank[indexs], res["usage"]["total_tokens"]
-
-
- class VoyageRerank(Base):
- def __init__(self, key, model_name, base_url=None):
- import voyageai
-
- self.client = voyageai.Client(api_key=key)
- self.model_name = model_name
-
- def similarity(self, query: str, texts: list):
- res = self.client.rerank(
- query=query, documents=texts, model=self.model_name, top_k=len(texts)
- )
- rank = np.array([r.relevance_score for r in res.results])
- indexs = [r.index for r in res.results]
- return rank[indexs], res.total_tokens
|