Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

rerank_model.py 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import re
  17. import threading
  18. from urllib.parse import urljoin
  19. import requests
  20. import httpx
  21. from huggingface_hub import snapshot_download
  22. import os
  23. from abc import ABC
  24. import numpy as np
  25. from yarl import URL
  26. from api import settings
  27. from api.utils.file_utils import get_home_cache_dir
  28. from rag.utils import num_tokens_from_string, truncate
  29. import json
  30. def sigmoid(x):
  31. return 1 / (1 + np.exp(-x))
  32. class Base(ABC):
  33. def __init__(self, key, model_name):
  34. pass
  35. def similarity(self, query: str, texts: list):
  36. raise NotImplementedError("Please implement encode method!")
  37. class DefaultRerank(Base):
  38. _model = None
  39. _model_lock = threading.Lock()
  40. def __init__(self, key, model_name, **kwargs):
  41. """
  42. If you have trouble downloading HuggingFace models, -_^ this might help!!
  43. For Linux:
  44. export HF_ENDPOINT=https://hf-mirror.com
  45. For Windows:
  46. Good luck
  47. ^_-
  48. """
  49. if not settings.LIGHTEN and not DefaultRerank._model:
  50. import torch
  51. from FlagEmbedding import FlagReranker
  52. with DefaultRerank._model_lock:
  53. if not DefaultRerank._model:
  54. try:
  55. DefaultRerank._model = FlagReranker(
  56. os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z0-9]+/", "", model_name)),
  57. use_fp16=torch.cuda.is_available())
  58. except Exception:
  59. model_dir = snapshot_download(repo_id=model_name,
  60. local_dir=os.path.join(get_home_cache_dir(),
  61. re.sub(r"^[a-zA-Z0-9]+/", "", model_name)),
  62. local_dir_use_symlinks=False)
  63. DefaultRerank._model = FlagReranker(model_dir, use_fp16=torch.cuda.is_available())
  64. self._model = DefaultRerank._model
  65. def similarity(self, query: str, texts: list):
  66. pairs = [(query, truncate(t, 2048)) for t in texts]
  67. token_count = 0
  68. for _, t in pairs:
  69. token_count += num_tokens_from_string(t)
  70. batch_size = 4096
  71. res = []
  72. for i in range(0, len(pairs), batch_size):
  73. scores = self._model.compute_score(pairs[i:i + batch_size], max_length=2048)
  74. scores = sigmoid(np.array(scores)).tolist()
  75. if isinstance(scores, float):
  76. res.append(scores)
  77. else:
  78. res.extend(scores)
  79. return np.array(res), token_count
  80. class JinaRerank(Base):
  81. def __init__(self, key, model_name="jina-reranker-v2-base-multilingual",
  82. base_url="https://api.jina.ai/v1/rerank"):
  83. self.base_url = "https://api.jina.ai/v1/rerank"
  84. self.headers = {
  85. "Content-Type": "application/json",
  86. "Authorization": f"Bearer {key}"
  87. }
  88. self.model_name = model_name
  89. def similarity(self, query: str, texts: list):
  90. texts = [truncate(t, 8196) for t in texts]
  91. data = {
  92. "model": self.model_name,
  93. "query": query,
  94. "documents": texts,
  95. "top_n": len(texts)
  96. }
  97. res = requests.post(self.base_url, headers=self.headers, json=data).json()
  98. rank = np.zeros(len(texts), dtype=float)
  99. for d in res["results"]:
  100. rank[d["index"]] = d["relevance_score"]
  101. return rank, res["usage"]["total_tokens"]
  102. class YoudaoRerank(DefaultRerank):
  103. _model = None
  104. _model_lock = threading.Lock()
  105. def __init__(self, key=None, model_name="maidalun1020/bce-reranker-base_v1", **kwargs):
  106. if not settings.LIGHTEN and not YoudaoRerank._model:
  107. from BCEmbedding import RerankerModel
  108. with YoudaoRerank._model_lock:
  109. if not YoudaoRerank._model:
  110. try:
  111. YoudaoRerank._model = RerankerModel(model_name_or_path=os.path.join(
  112. get_home_cache_dir(),
  113. re.sub(r"^[a-zA-Z0-9]+/", "", model_name)))
  114. except Exception:
  115. YoudaoRerank._model = RerankerModel(
  116. model_name_or_path=model_name.replace(
  117. "maidalun1020", "InfiniFlow"))
  118. self._model = YoudaoRerank._model
  119. def similarity(self, query: str, texts: list):
  120. pairs = [(query, truncate(t, self._model.max_length)) for t in texts]
  121. token_count = 0
  122. for _, t in pairs:
  123. token_count += num_tokens_from_string(t)
  124. batch_size = 8
  125. res = []
  126. for i in range(0, len(pairs), batch_size):
  127. scores = self._model.compute_score(pairs[i:i + batch_size], max_length=self._model.max_length)
  128. scores = sigmoid(np.array(scores)).tolist()
  129. if isinstance(scores, float):
  130. res.append(scores)
  131. else:
  132. res.extend(scores)
  133. return np.array(res), token_count
  134. class XInferenceRerank(Base):
  135. def __init__(self, key="xxxxxxx", model_name="", base_url=""):
  136. if base_url.find("/v1") == -1:
  137. base_url = urljoin(base_url, "/v1/rerank")
  138. if base_url.find("/rerank") == -1:
  139. base_url = urljoin(base_url, "/v1/rerank")
  140. self.model_name = model_name
  141. self.base_url = base_url
  142. self.headers = {
  143. "Content-Type": "application/json",
  144. "accept": "application/json",
  145. "Authorization": f"Bearer {key}"
  146. }
  147. def similarity(self, query: str, texts: list):
  148. if len(texts) == 0:
  149. return np.array([]), 0
  150. pairs = [(query, truncate(t, 4096)) for t in texts]
  151. token_count = 0
  152. for _, t in pairs:
  153. token_count += num_tokens_from_string(t)
  154. data = {
  155. "model": self.model_name,
  156. "query": query,
  157. "return_documents": "true",
  158. "return_len": "true",
  159. "documents": texts
  160. }
  161. res = requests.post(self.base_url, headers=self.headers, json=data).json()
  162. rank = np.zeros(len(texts), dtype=float)
  163. for d in res["results"]:
  164. rank[d["index"]] = d["relevance_score"]
  165. return rank, token_count
  166. class LocalAIRerank(Base):
  167. def __init__(self, key, model_name, base_url):
  168. if base_url.find("/rerank") == -1:
  169. self.base_url = urljoin(base_url, "/rerank")
  170. else:
  171. self.base_url = base_url
  172. self.headers = {
  173. "Content-Type": "application/json",
  174. "Authorization": f"Bearer {key}"
  175. }
  176. self.model_name = model_name.split("___")[0]
  177. def similarity(self, query: str, texts: list):
  178. # noway to config Ragflow , use fix setting
  179. texts = [truncate(t, 500) for t in texts]
  180. data = {
  181. "model": self.model_name,
  182. "query": query,
  183. "documents": texts,
  184. "top_n": len(texts),
  185. }
  186. token_count = 0
  187. for t in texts:
  188. token_count += num_tokens_from_string(t)
  189. res = requests.post(self.base_url, headers=self.headers, json=data).json()
  190. rank = np.zeros(len(texts), dtype=float)
  191. if 'results' not in res:
  192. raise ValueError("response not contains results\n" + str(res))
  193. for d in res["results"]:
  194. rank[d["index"]] = d["relevance_score"]
  195. # Normalize the rank values to the range 0 to 1
  196. min_rank = np.min(rank)
  197. max_rank = np.max(rank)
  198. # Avoid division by zero if all ranks are identical
  199. if max_rank - min_rank != 0:
  200. rank = (rank - min_rank) / (max_rank - min_rank)
  201. else:
  202. rank = np.zeros_like(rank)
  203. return rank, token_count
  204. class NvidiaRerank(Base):
  205. def __init__(
  206. self, key, model_name, base_url="https://ai.api.nvidia.com/v1/retrieval/nvidia/"
  207. ):
  208. if not base_url:
  209. base_url = "https://ai.api.nvidia.com/v1/retrieval/nvidia/"
  210. self.model_name = model_name
  211. if self.model_name == "nvidia/nv-rerankqa-mistral-4b-v3":
  212. self.base_url = os.path.join(
  213. base_url, "nv-rerankqa-mistral-4b-v3", "reranking"
  214. )
  215. if self.model_name == "nvidia/rerank-qa-mistral-4b":
  216. self.base_url = os.path.join(base_url, "reranking")
  217. self.model_name = "nv-rerank-qa-mistral-4b:1"
  218. self.headers = {
  219. "accept": "application/json",
  220. "Content-Type": "application/json",
  221. "Authorization": f"Bearer {key}",
  222. }
  223. def similarity(self, query: str, texts: list):
  224. token_count = num_tokens_from_string(query) + sum(
  225. [num_tokens_from_string(t) for t in texts]
  226. )
  227. data = {
  228. "model": self.model_name,
  229. "query": {"text": query},
  230. "passages": [{"text": text} for text in texts],
  231. "truncate": "END",
  232. "top_n": len(texts),
  233. }
  234. res = requests.post(self.base_url, headers=self.headers, json=data).json()
  235. rank = np.zeros(len(texts), dtype=float)
  236. for d in res["rankings"]:
  237. rank[d["index"]] = d["logit"]
  238. return rank, token_count
  239. class LmStudioRerank(Base):
  240. def __init__(self, key, model_name, base_url):
  241. pass
  242. def similarity(self, query: str, texts: list):
  243. raise NotImplementedError("The LmStudioRerank has not been implement")
  244. class OpenAI_APIRerank(Base):
  245. def __init__(self, key, model_name, base_url):
  246. if base_url.find("/rerank") == -1:
  247. self.base_url = urljoin(base_url, "/rerank")
  248. else:
  249. self.base_url = base_url
  250. self.headers = {
  251. "Content-Type": "application/json",
  252. "Authorization": f"Bearer {key}"
  253. }
  254. self.model_name = model_name.split("___")[0]
  255. def similarity(self, query: str, texts: list):
  256. # noway to config Ragflow , use fix setting
  257. texts = [truncate(t, 500) for t in texts]
  258. data = {
  259. "model": self.model_name,
  260. "query": query,
  261. "documents": texts,
  262. "top_n": len(texts),
  263. }
  264. token_count = 0
  265. for t in texts:
  266. token_count += num_tokens_from_string(t)
  267. res = requests.post(self.base_url, headers=self.headers, json=data).json()
  268. rank = np.zeros(len(texts), dtype=float)
  269. if 'results' not in res:
  270. raise ValueError("response not contains results\n" + str(res))
  271. for d in res["results"]:
  272. rank[d["index"]] = d["relevance_score"]
  273. # Normalize the rank values to the range 0 to 1
  274. min_rank = np.min(rank)
  275. max_rank = np.max(rank)
  276. # Avoid division by zero if all ranks are identical
  277. if max_rank - min_rank != 0:
  278. rank = (rank - min_rank) / (max_rank - min_rank)
  279. else:
  280. rank = np.zeros_like(rank)
  281. return rank, token_count
  282. class CoHereRerank(Base):
  283. def __init__(self, key, model_name, base_url=None):
  284. from cohere import Client
  285. self.client = Client(api_key=key)
  286. self.model_name = model_name
  287. def similarity(self, query: str, texts: list):
  288. token_count = num_tokens_from_string(query) + sum(
  289. [num_tokens_from_string(t) for t in texts]
  290. )
  291. res = self.client.rerank(
  292. model=self.model_name,
  293. query=query,
  294. documents=texts,
  295. top_n=len(texts),
  296. return_documents=False,
  297. )
  298. rank = np.zeros(len(texts), dtype=float)
  299. for d in res.results:
  300. rank[d.index] = d.relevance_score
  301. return rank, token_count
  302. class TogetherAIRerank(Base):
  303. def __init__(self, key, model_name, base_url):
  304. pass
  305. def similarity(self, query: str, texts: list):
  306. raise NotImplementedError("The api has not been implement")
  307. class SILICONFLOWRerank(Base):
  308. def __init__(
  309. self, key, model_name, base_url="https://api.siliconflow.cn/v1/rerank"
  310. ):
  311. if not base_url:
  312. base_url = "https://api.siliconflow.cn/v1/rerank"
  313. self.model_name = model_name
  314. self.base_url = base_url
  315. self.headers = {
  316. "accept": "application/json",
  317. "content-type": "application/json",
  318. "authorization": f"Bearer {key}",
  319. }
  320. def similarity(self, query: str, texts: list):
  321. payload = {
  322. "model": self.model_name,
  323. "query": query,
  324. "documents": texts,
  325. "top_n": len(texts),
  326. "return_documents": False,
  327. "max_chunks_per_doc": 1024,
  328. "overlap_tokens": 80,
  329. }
  330. response = requests.post(
  331. self.base_url, json=payload, headers=self.headers
  332. ).json()
  333. rank = np.zeros(len(texts), dtype=float)
  334. if "results" not in response:
  335. return rank, 0
  336. for d in response["results"]:
  337. rank[d["index"]] = d["relevance_score"]
  338. return (
  339. rank,
  340. response["meta"]["tokens"]["input_tokens"] + response["meta"]["tokens"]["output_tokens"],
  341. )
  342. class BaiduYiyanRerank(Base):
  343. def __init__(self, key, model_name, base_url=None):
  344. from qianfan.resources import Reranker
  345. key = json.loads(key)
  346. ak = key.get("yiyan_ak", "")
  347. sk = key.get("yiyan_sk", "")
  348. self.client = Reranker(ak=ak, sk=sk)
  349. self.model_name = model_name
  350. def similarity(self, query: str, texts: list):
  351. res = self.client.do(
  352. model=self.model_name,
  353. query=query,
  354. documents=texts,
  355. top_n=len(texts),
  356. ).body
  357. rank = np.zeros(len(texts), dtype=float)
  358. for d in res["results"]:
  359. rank[d["index"]] = d["relevance_score"]
  360. return rank, res["usage"]["total_tokens"]
  361. class VoyageRerank(Base):
  362. def __init__(self, key, model_name, base_url=None):
  363. import voyageai
  364. self.client = voyageai.Client(api_key=key)
  365. self.model_name = model_name
  366. def similarity(self, query: str, texts: list):
  367. rank = np.zeros(len(texts), dtype=float)
  368. if not texts:
  369. return rank, 0
  370. res = self.client.rerank(
  371. query=query, documents=texts, model=self.model_name, top_k=len(texts)
  372. )
  373. for r in res.results:
  374. rank[r.index] = r.relevance_score
  375. return rank, res.total_tokens
  376. class QWenRerank(Base):
  377. def __init__(self, key, model_name='gte-rerank', base_url=None, **kwargs):
  378. import dashscope
  379. self.api_key = key
  380. self.model_name = dashscope.TextReRank.Models.gte_rerank if model_name is None else model_name
  381. def similarity(self, query: str, texts: list):
  382. import dashscope
  383. from http import HTTPStatus
  384. resp = dashscope.TextReRank.call(
  385. api_key=self.api_key,
  386. model=self.model_name,
  387. query=query,
  388. documents=texts,
  389. top_n=len(texts),
  390. return_documents=False
  391. )
  392. rank = np.zeros(len(texts), dtype=float)
  393. if resp.status_code == HTTPStatus.OK:
  394. for r in resp.output.results:
  395. rank[r.index] = r.relevance_score
  396. return rank, resp.usage.total_tokens
  397. else:
  398. raise ValueError(f"Error calling QWenRerank model {self.model_name}: {resp.status_code} - {resp.text}")
  399. class GPUStackRerank(Base):
  400. def __init__(
  401. self, key, model_name, base_url
  402. ):
  403. if not base_url:
  404. raise ValueError("url cannot be None")
  405. self.model_name = model_name
  406. self.base_url = str(URL(base_url)/ "v1" / "rerank")
  407. self.headers = {
  408. "accept": "application/json",
  409. "content-type": "application/json",
  410. "authorization": f"Bearer {key}",
  411. }
  412. def similarity(self, query: str, texts: list):
  413. payload = {
  414. "model": self.model_name,
  415. "query": query,
  416. "documents": texts,
  417. "top_n": len(texts),
  418. }
  419. try:
  420. response = requests.post(
  421. self.base_url, json=payload, headers=self.headers
  422. )
  423. response.raise_for_status()
  424. response_json = response.json()
  425. rank = np.zeros(len(texts), dtype=float)
  426. if "results" not in response_json:
  427. return rank, 0
  428. token_count = 0
  429. for t in texts:
  430. token_count += num_tokens_from_string(t)
  431. for result in response_json["results"]:
  432. rank[result["index"]] = result["relevance_score"]
  433. return (
  434. rank,
  435. token_count,
  436. )
  437. except httpx.HTTPStatusError as e:
  438. raise ValueError(f"Error calling GPUStackRerank model {self.model_name}: {e.response.status_code} - {e.response.text}")