Procházet zdrojové kódy

fix add segment when dataset and document is empty (#3021)

Co-authored-by: jyong <jyong@dify.ai>
tags/0.5.11
Jyong před 1 rokem
rodič
revize
a6cd0f0e73
Žádný účet není propojen s e-mailovou adresou tvůrce revize

+ 10
- 0
api/core/rag/datasource/vdb/milvus/milvus_vector.py Zobrazit soubor

@@ -144,6 +144,16 @@ class MilvusVector(BaseVector):
utility.drop_collection(self._collection_name, None, using=alias)

def text_exists(self, id: str) -> bool:
alias = uuid4().hex
if self._client_config.secure:
uri = "https://" + str(self._client_config.host) + ":" + str(self._client_config.port)
else:
uri = "http://" + str(self._client_config.host) + ":" + str(self._client_config.port)
connections.connect(alias=alias, uri=uri, user=self._client_config.user, password=self._client_config.password)

from pymilvus import utility
if not utility.has_collection(self._collection_name, using=alias):
return False

result = self._client.query(collection_name=self._collection_name,
filter=f'metadata["doc_id"] == "{id}"',

+ 7
- 0
api/core/rag/datasource/vdb/qdrant/qdrant_vector.py Zobrazit soubor

@@ -275,6 +275,13 @@ class QdrantVector(BaseVector):
)

def text_exists(self, id: str) -> bool:
all_collection_name = []
collections_response = self._client.get_collections()
collection_list = collections_response.collections
for collection in collection_list:
all_collection_name.append(collection.name)
if self._collection_name not in all_collection_name:
return False
response = self._client.retrieve(
collection_name=self._collection_name,
ids=[id]

+ 2
- 2
api/core/rag/datasource/vdb/vector_factory.py Zobrazit soubor

@@ -128,8 +128,8 @@ class Vector:
if kwargs.get('duplicate_check', False):
documents = self._filter_duplicate_texts(documents)
embeddings = self._embeddings.embed_documents([document.page_content for document in documents])
self._vector_processor.add_texts(
documents=documents,
self._vector_processor.create(
texts=documents,
embeddings=embeddings,
**kwargs
)

+ 5
- 0
api/core/rag/datasource/vdb/weaviate/weaviate_vector.py Zobrazit soubor

@@ -134,6 +134,11 @@ class WeaviateVector(BaseVector):

def text_exists(self, id: str) -> bool:
collection_name = self._collection_name
schema = self._default_schema(self._collection_name)

# check whether the index already exists
if not self._client.schema.contains(schema):
return False
result = self._client.query.get(collection_name).with_additional(["id"]).with_where({
"path": ["doc_id"],
"operator": "Equal",

Načítá se…
Zrušit
Uložit