Co-authored-by: jyong <jyong@dify.ai>tags/0.5.11
| @@ -144,6 +144,16 @@ class MilvusVector(BaseVector): | |||
| utility.drop_collection(self._collection_name, None, using=alias) | |||
| def text_exists(self, id: str) -> bool: | |||
| alias = uuid4().hex | |||
| if self._client_config.secure: | |||
| uri = "https://" + str(self._client_config.host) + ":" + str(self._client_config.port) | |||
| else: | |||
| uri = "http://" + str(self._client_config.host) + ":" + str(self._client_config.port) | |||
| connections.connect(alias=alias, uri=uri, user=self._client_config.user, password=self._client_config.password) | |||
| from pymilvus import utility | |||
| if not utility.has_collection(self._collection_name, using=alias): | |||
| return False | |||
| result = self._client.query(collection_name=self._collection_name, | |||
| filter=f'metadata["doc_id"] == "{id}"', | |||
| @@ -275,6 +275,13 @@ class QdrantVector(BaseVector): | |||
| ) | |||
| def text_exists(self, id: str) -> bool: | |||
| all_collection_name = [] | |||
| collections_response = self._client.get_collections() | |||
| collection_list = collections_response.collections | |||
| for collection in collection_list: | |||
| all_collection_name.append(collection.name) | |||
| if self._collection_name not in all_collection_name: | |||
| return False | |||
| response = self._client.retrieve( | |||
| collection_name=self._collection_name, | |||
| ids=[id] | |||
| @@ -128,8 +128,8 @@ class Vector: | |||
| if kwargs.get('duplicate_check', False): | |||
| documents = self._filter_duplicate_texts(documents) | |||
| embeddings = self._embeddings.embed_documents([document.page_content for document in documents]) | |||
| self._vector_processor.add_texts( | |||
| documents=documents, | |||
| self._vector_processor.create( | |||
| texts=documents, | |||
| embeddings=embeddings, | |||
| **kwargs | |||
| ) | |||
| @@ -134,6 +134,11 @@ class WeaviateVector(BaseVector): | |||
| def text_exists(self, id: str) -> bool: | |||
| collection_name = self._collection_name | |||
| schema = self._default_schema(self._collection_name) | |||
| # check whether the index already exists | |||
| if not self._client.schema.contains(schema): | |||
| return False | |||
| result = self._client.query.get(collection_name).with_additional(["id"]).with_where({ | |||
| "path": ["doc_id"], | |||
| "operator": "Equal", | |||