Browse Source

Fix/vdb lindorm (#16660)

Co-authored-by: jiangzhijie <jiangzhijie.jzj@alibaba-inc.com>
tags/1.2.0
Jiang 7 months ago
parent
commit
fc8c765215
No account linked to committer's email address
1 changed files with 6 additions and 8 deletions
  1. 6
    8
      api/core/rag/datasource/vdb/lindorm/lindorm_vector.py

+ 6
- 8
api/core/rag/datasource/vdb/lindorm/lindorm_vector.py View File

if response["errors"]: if response["errors"]:
for item in response["items"]: for item in response["items"]:
print(f"{item['index']['status']}: {item['index']['error']['type']}") print(f"{item['index']['status']}: {item['index']['error']['type']}")
else:
self.refresh()


def get_ids_by_metadata_field(self, key: str, value: str): def get_ids_by_metadata_field(self, key: str, value: str):
query: dict[str, Any] = { query: dict[str, Any] = {
if not all(isinstance(x, float) for x in query_vector): if not all(isinstance(x, float) for x in query_vector):
raise ValueError("All elements in query_vector should be floats") raise ValueError("All elements in query_vector should be floats")


top_k = kwargs.get("top_k", 10)
top_k = kwargs.get("top_k", 3)
document_ids_filter = kwargs.get("document_ids_filter") document_ids_filter = kwargs.get("document_ids_filter")
filters = [] filters = []
if document_ids_filter: if document_ids_filter:
must_not = kwargs.get("must_not") must_not = kwargs.get("must_not")
should = kwargs.get("should") should = kwargs.get("should")
minimum_should_match = kwargs.get("minimum_should_match", 0) minimum_should_match = kwargs.get("minimum_should_match", 0)
top_k = kwargs.get("top_k", 10)
top_k = kwargs.get("top_k", 3)
filters = kwargs.get("filter", []) filters = kwargs.get("filter", [])
document_ids_filter = kwargs.get("document_ids_filter") document_ids_filter = kwargs.get("document_ids_filter")
if document_ids_filter: if document_ids_filter:




def default_text_mapping(dimension: int, method_name: str, **kwargs: Any) -> dict: def default_text_mapping(dimension: int, method_name: str, **kwargs: Any) -> dict:
excludes_from_source = kwargs.get("excludes_from_source")
excludes_from_source = kwargs.get("excludes_from_source", False)
analyzer = kwargs.get("analyzer", "ik_max_word") analyzer = kwargs.get("analyzer", "ik_max_word")
text_field = kwargs.get("text_field", Field.CONTENT_KEY.value) text_field = kwargs.get("text_field", Field.CONTENT_KEY.value)
engine = kwargs["engine"] engine = kwargs["engine"]


if excludes_from_source: if excludes_from_source:
# e.g. {"excludes": ["vector_field"]} # e.g. {"excludes": ["vector_field"]}
mapping["mappings"]["_source"] = {"excludes": excludes_from_source}
mapping["mappings"]["_source"] = {"excludes": [vector_field]}


if using_ugc and method_name == "ivfpq": if using_ugc and method_name == "ivfpq":
mapping["settings"]["index"]["knn_routing"] = True mapping["settings"]["index"]["knn_routing"] = True
mapping["settings"]["index"]["knn.offline.construction"] = True mapping["settings"]["index"]["knn.offline.construction"] = True
elif using_ugc and method_name == "hnsw" or using_ugc and method_name == "flat":
elif (using_ugc and method_name == "hnsw") or (using_ugc and method_name == "flat"):
mapping["settings"]["index"]["knn_routing"] = True mapping["settings"]["index"]["knn_routing"] = True
return mapping return mapping


"query": {"knn": {vector_field: {"vector": query_vector, "k": k}}}, "query": {"knn": {vector_field: {"vector": query_vector, "k": k}}},
} }


if filters is not None:
if filters is not None and len(filters) > 0:
# when using filter, transform filter from List[Dict] to Dict as valid format # when using filter, transform filter from List[Dict] to Dict as valid format
filter_dict = {"bool": {"must": filters}} if len(filters) > 1 else filters[0] filter_dict = {"bool": {"must": filters}} if len(filters) > 1 else filters[0]
search_query["query"]["knn"][vector_field]["filter"] = filter_dict # filter should be Dict search_query["query"]["knn"][vector_field]["filter"] = filter_dict # filter should be Dict

Loading…
Cancel
Save