浏览代码

FEAT: Tencent Vector optimize BM25 initialization to reduce loading time (#24915)

Co-authored-by: wlleiiwang <wlleiiwang@tencent.com>
tags/1.8.1
wlleiiwang 2 个月前
父节点
当前提交
9486715929
没有帐户链接到提交者的电子邮件
共有 1 个文件被更改,包括 5 次插入3 次删除
  1. 5
    3
      api/core/rag/datasource/vdb/tencent/tencent_vector.py

+ 5
- 3
api/core/rag/datasource/vdb/tencent/tencent_vector.py 查看文件

return {"url": self.url, "username": self.username, "key": self.api_key, "timeout": self.timeout} return {"url": self.url, "username": self.username, "key": self.api_key, "timeout": self.timeout}




bm25 = BM25Encoder.default("zh")


class TencentVector(BaseVector): class TencentVector(BaseVector):
field_id: str = "id" field_id: str = "id"
field_vector: str = "vector" field_vector: str = "vector"
self._dimension = 1024 self._dimension = 1024
self._init_database() self._init_database()
self._load_collection() self._load_collection()
self._bm25 = BM25Encoder.default("zh")


def _load_collection(self): def _load_collection(self):
""" """
metadata=metadata, metadata=metadata,
) )
if self._enable_hybrid_search: if self._enable_hybrid_search:
doc.__dict__["sparse_vector"] = self._bm25.encode_texts(texts[i])
doc.__dict__["sparse_vector"] = bm25.encode_texts(texts[i])
docs.append(doc) docs.append(doc)
self._client.upsert( self._client.upsert(
database_name=self._client_config.database, database_name=self._client_config.database,
match=[ match=[
KeywordSearch( KeywordSearch(
field_name="sparse_vector", field_name="sparse_vector",
data=self._bm25.encode_queries(query),
data=bm25.encode_queries(query),
), ),
], ],
rerank=WeightedRerank( rerank=WeightedRerank(

正在加载...
取消
保存