Bladeren bron

generalize the generation of new collection name by dataset id (#2620)

tags/0.5.8
Bowen Liang 1 jaar geleden
bovenliggende
commit
801d135390
No account linked to committer's email address

+ 3
- 3
api/commands.py Bestand weergeven

continue continue
if vector_type == "weaviate": if vector_type == "weaviate":
dataset_id = dataset.id dataset_id = dataset.id
collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node'
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = { index_struct_dict = {
"type": 'weaviate', "type": 'weaviate',
"vector_store": {"class_prefix": collection_name} "vector_store": {"class_prefix": collection_name}
raise ValueError('Dataset Collection Bindings is not exist!') raise ValueError('Dataset Collection Bindings is not exist!')
else: else:
dataset_id = dataset.id dataset_id = dataset.id
collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node'
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = { index_struct_dict = {
"type": 'qdrant', "type": 'qdrant',
"vector_store": {"class_prefix": collection_name} "vector_store": {"class_prefix": collection_name}


elif vector_type == "milvus": elif vector_type == "milvus":
dataset_id = dataset.id dataset_id = dataset.id
collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node'
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = { index_struct_dict = {
"type": 'milvus', "type": 'milvus',
"vector_store": {"class_prefix": collection_name} "vector_store": {"class_prefix": collection_name}

+ 3
- 3
api/core/rag/datasource/vdb/vector_factory.py Bestand weergeven

collection_name = class_prefix collection_name = class_prefix
else: else:
dataset_id = self._dataset.id dataset_id = self._dataset.id
collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node'
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = { index_struct_dict = {
"type": 'weaviate', "type": 'weaviate',
"vector_store": {"class_prefix": collection_name} "vector_store": {"class_prefix": collection_name}
collection_name = class_prefix collection_name = class_prefix
else: else:
dataset_id = self._dataset.id dataset_id = self._dataset.id
collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node'
collection_name = Dataset.gen_collection_name_by_id(dataset_id)


if not self._dataset.index_struct_dict: if not self._dataset.index_struct_dict:
index_struct_dict = { index_struct_dict = {
collection_name = class_prefix collection_name = class_prefix
else: else:
dataset_id = self._dataset.id dataset_id = self._dataset.id
collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node'
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = { index_struct_dict = {
"type": 'milvus', "type": 'milvus',
"vector_store": {"class_prefix": collection_name} "vector_store": {"class_prefix": collection_name}

+ 1
- 1
api/core/rag/datasource/vdb/weaviate/weaviate_vector.py Bestand weergeven

return class_prefix return class_prefix


dataset_id = dataset.id dataset_id = dataset.id
return "Vector_index_" + dataset_id.replace("-", "_") + '_Node'
return Dataset.gen_collection_name_by_id(dataset_id)


def to_index_struct(self) -> dict: def to_index_struct(self) -> dict:
return { return {

+ 4
- 0
api/models/dataset.py Bestand weergeven

} }
return self.retrieval_model if self.retrieval_model else default_retrieval_model return self.retrieval_model if self.retrieval_model else default_retrieval_model


@staticmethod
def gen_collection_name_by_id(dataset_id: str) -> str:
normalized_dataset_id = dataset_id.replace("-", "_")
return f'Vector_index_{normalized_dataset_id}_Node'


class DatasetProcessRule(db.Model): class DatasetProcessRule(db.Model):
__tablename__ = 'dataset_process_rules' __tablename__ = 'dataset_process_rules'

+ 1
- 1
api/services/dataset_service.py Bestand weergeven

dataset_collection_binding = DatasetCollectionBinding( dataset_collection_binding = DatasetCollectionBinding(
provider_name=provider_name, provider_name=provider_name,
model_name=model_name, model_name=model_name,
collection_name="Vector_index_" + str(uuid.uuid4()).replace("-", "_") + '_Node',
collection_name=Dataset.gen_collection_name_by_id(str(uuid.uuid4())),
type=collection_type type=collection_type
) )
db.session.add(dataset_collection_binding) db.session.add(dataset_collection_binding)

Laden…
Annuleren
Opslaan