소스 검색

Feat: support huggingface re-rank model. (#5684)

### What problem does this PR solve?

#5658

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
tags/v0.17.1
Kevin Hu 8 달 전
부모
커밋
b8da2eeb69
No account linked to committer's email address
3개의 변경된 파일46개의 추가작업 그리고 7개의 파일을 삭제
  1. 1
    1
      conf/llm_factories.json
  2. 5
    0
      rag/llm/__init__.py
  3. 40
    6
      rag/llm/rerank_model.py

+ 1
- 1
conf/llm_factories.json 파일 보기

{ {
"name": "HuggingFace", "name": "HuggingFace",
"logo": "", "logo": "",
"tags": "TEXT EMBEDDING",
"tags": "TEXT EMBEDDING,TEXT RE-RANK",
"status": "1", "status": "1",
"llm": [] "llm": []
}, },

+ 5
- 0
rag/llm/__init__.py 파일 보기

YiCV, YiCV,
HunyuanCV, HunyuanCV,
) )

from .rerank_model import ( from .rerank_model import (
LocalAIRerank, LocalAIRerank,
DefaultRerank, DefaultRerank,
VoyageRerank, VoyageRerank,
QWenRerank, QWenRerank,
GPUStackRerank, GPUStackRerank,
HuggingfaceRerank,
) )

from .sequence2txt_model import ( from .sequence2txt_model import (
GPTSeq2txt, GPTSeq2txt,
QWenSeq2txt, QWenSeq2txt,
TencentCloudSeq2txt, TencentCloudSeq2txt,
GPUStackSeq2txt, GPUStackSeq2txt,
) )

from .tts_model import ( from .tts_model import (
FishAudioTTS, FishAudioTTS,
QwenTTS, QwenTTS,
"Voyage AI": VoyageRerank, "Voyage AI": VoyageRerank,
"Tongyi-Qianwen": QWenRerank, "Tongyi-Qianwen": QWenRerank,
"GPUStack": GPUStackRerank, "GPUStack": GPUStackRerank,
"HuggingFace": HuggingfaceRerank,
} }


Seq2txtModel = { Seq2txtModel = {

+ 40
- 6
rag/llm/rerank_model.py 파일 보기

import json import json





def sigmoid(x): def sigmoid(x):
return 1 / (1 + np.exp(-x)) return 1 / (1 + np.exp(-x))


local_dir_use_symlinks=False) local_dir_use_symlinks=False)
DefaultRerank._model = FlagReranker(model_dir, use_fp16=torch.cuda.is_available()) DefaultRerank._model = FlagReranker(model_dir, use_fp16=torch.cuda.is_available())
self._model = DefaultRerank._model self._model = DefaultRerank._model
self._dynamic_batch_size = 8
self._dynamic_batch_size = 8
self._min_batch_size = 1 self._min_batch_size = 1


def torch_empty_cache(self): def torch_empty_cache(self):
try: try:
import torch import torch
while retry_count < max_retries: while retry_count < max_retries:
try: try:
# call subclass implemented batch processing calculation # call subclass implemented batch processing calculation
batch_scores = self._compute_batch_scores(pairs[i:i+current_batch])
batch_scores = self._compute_batch_scores(pairs[i:i + current_batch])
res.extend(batch_scores) res.extend(batch_scores)
i += current_batch i += current_batch
self._dynamic_batch_size = min(self._dynamic_batch_size * 2, 8) self._dynamic_batch_size = min(self._dynamic_batch_size * 2, 8)


return rank, token_count return rank, token_count



class NvidiaRerank(Base): class NvidiaRerank(Base):
def __init__( def __init__(
self, key, model_name, base_url="https://ai.api.nvidia.com/v1/retrieval/nvidia/" self, key, model_name, base_url="https://ai.api.nvidia.com/v1/retrieval/nvidia/"
else: else:
raise ValueError(f"Error calling QWenRerank model {self.model_name}: {resp.status_code} - {resp.text}") raise ValueError(f"Error calling QWenRerank model {self.model_name}: {resp.status_code} - {resp.text}")



class HuggingfaceRerank(DefaultRerank):
@staticmethod
def post(query: str, texts: list, url="127.0.0.1"):
exc = None
scores = [0 for _ in range(len(texts))]
batch_size = 8
for i in range(0, len(texts), batch_size):
try:
res = requests.post(f"http://{url}/rerank", headers={"Content-Type": "application/json"},
json={"query": query, "texts": texts[i: i + batch_size],
"raw_scores": False, "truncate": True})
for o in res.json():
scores[o["index"] + i] = o["score"]
except Exception as e:
exc = e

if exc:
raise exc
return np.array(scores)

def __init__(self, key, model_name="BAAI/bge-reranker-v2-m3", base_url="http://127.0.0.1"):
self.model_name = model_name
self.base_url = base_url

def similarity(self, query: str, texts: list) -> tuple[np.ndarray, int]:
if not texts:
return np.array([]), 0
token_count = 0
for t in texts:
token_count += num_tokens_from_string(t)
return HuggingfaceRerank.post(query, texts, self.base_url), token_count


class GPUStackRerank(Base): class GPUStackRerank(Base):
def __init__( def __init__(
self, key, model_name, base_url self, key, model_name, base_url
raise ValueError("url cannot be None") raise ValueError("url cannot be None")


self.model_name = model_name self.model_name = model_name
self.base_url = str(URL(base_url)/ "v1" / "rerank")
self.base_url = str(URL(base_url) / "v1" / "rerank")
self.headers = { self.headers = {
"accept": "application/json", "accept": "application/json",
"content-type": "application/json", "content-type": "application/json",
) )


except httpx.HTTPStatusError as e: except httpx.HTTPStatusError as e:
raise ValueError(f"Error calling GPUStackRerank model {self.model_name}: {e.response.status_code} - {e.response.text}")
raise ValueError(
f"Error calling GPUStackRerank model {self.model_name}: {e.response.status_code} - {e.response.text}")



Loading…
취소
저장