瀏覽代碼

fix weaviate hybrid search issue (#1600)

Co-authored-by: jyong <jyong@dify.ai>
tags/0.3.31-fix3
Jyong 1 年之前
父節點
當前提交
b930716745
沒有連結到貢獻者的電子郵件帳戶。

+ 1
- 1
api/core/index/vector_index/weaviate_vector_index.py 查看文件

if self._vector_store: if self._vector_store:
return self._vector_store return self._vector_store


attributes = ['doc_id', 'dataset_id', 'document_id']
attributes = ['doc_id', 'dataset_id', 'document_id', 'doc_hash']
if self._is_origin(): if self._is_origin():
attributes = ['doc_id'] attributes = ['doc_id']



+ 6
- 5
api/core/vector_store/vector/weaviate.py 查看文件





def _default_score_normalizer(val: float) -> float: def _default_score_normalizer(val: float) -> float:
return 1 - 1 / (1 + np.exp(val))
return 1 - val




def _json_serializable(value: Any) -> Any: def _json_serializable(value: Any) -> Any:
query_obj = query_obj.with_where(kwargs.get("where_filter")) query_obj = query_obj.with_where(kwargs.get("where_filter"))
if kwargs.get("additional"): if kwargs.get("additional"):
query_obj = query_obj.with_additional(kwargs.get("additional")) query_obj = query_obj.with_additional(kwargs.get("additional"))
result = query_obj.with_bm25(query=content).with_limit(k).do()
properties = ['text', 'dataset_id', 'doc_hash', 'doc_id', 'document_id']
result = query_obj.with_bm25(query=query, properties=properties).with_limit(k).do()
if "errors" in result: if "errors" in result:
raise ValueError(f"Error during query: {result['errors']}") raise ValueError(f"Error during query: {result['errors']}")
docs = [] docs = []
result = ( result = (
query_obj.with_near_vector(vector) query_obj.with_near_vector(vector)
.with_limit(k) .with_limit(k)
.with_additional("vector")
.with_additional(["vector", "distance"])
.do() .do()
) )
else: else:
result = ( result = (
query_obj.with_near_text(content) query_obj.with_near_text(content)
.with_limit(k) .with_limit(k)
.with_additional("vector")
.with_additional(["vector", "distance"])
.do() .do()
) )


docs_and_scores = [] docs_and_scores = []
for res in result["data"]["Get"][self._index_name]: for res in result["data"]["Get"][self._index_name]:
text = res.pop(self._text_key) text = res.pop(self._text_key)
score = np.dot(res["_additional"]["vector"], embedded_query)
score = res["_additional"]["distance"]
docs_and_scores.append((Document(page_content=text, metadata=res), score)) docs_and_scores.append((Document(page_content=text, metadata=res), score))
return docs_and_scores return docs_and_scores



+ 1
- 1
api/core/vector_store/weaviate_vector_store.py 查看文件

from langchain.vectorstores import Weaviate
from core.vector_store.vector.weaviate import Weaviate




class WeaviateVectorStore(Weaviate): class WeaviateVectorStore(Weaviate):

+ 2
- 2
docker/docker-compose.middleware.yaml 查看文件



# The Weaviate vector store. # The Weaviate vector store.
weaviate: weaviate:
image: semitechnologies/weaviate:1.18.4
image: semitechnologies/weaviate:1.19.0
restart: always restart: always
volumes: volumes:
# Mount the Weaviate data directory to the container. # Mount the Weaviate data directory to the container.
# environment: # environment:
# QDRANT__API_KEY: 'difyai123456' # QDRANT__API_KEY: 'difyai123456'
# ports: # ports:
# - "6333:6333"
# - "6333:6333"

+ 1
- 1
docker/docker-compose.yaml 查看文件



# The Weaviate vector store. # The Weaviate vector store.
weaviate: weaviate:
image: semitechnologies/weaviate:1.18.4
image: semitechnologies/weaviate:1.19.0
restart: always restart: always
volumes: volumes:
# Mount the Weaviate data directory to the container. # Mount the Weaviate data directory to the container.

Loading…
取消
儲存