|
|
|
@@ -1,20 +1,21 @@ |
|
|
|
import base64 |
|
|
|
import json |
|
|
|
import secrets |
|
|
|
from typing import cast |
|
|
|
|
|
|
|
import click |
|
|
|
from flask import current_app |
|
|
|
from werkzeug.exceptions import NotFound |
|
|
|
|
|
|
|
from core.embedding.cached_embedding import CacheEmbedding |
|
|
|
from core.model_manager import ModelManager |
|
|
|
from core.model_runtime.entities.model_entities import ModelType |
|
|
|
from core.rag.datasource.vdb.vector_factory import Vector |
|
|
|
from core.rag.models.document import Document |
|
|
|
from extensions.ext_database import db |
|
|
|
from libs.helper import email as email_validate |
|
|
|
from libs.password import hash_password, password_pattern, valid_password |
|
|
|
from libs.rsa import generate_key_pair |
|
|
|
from models.account import Tenant |
|
|
|
from models.dataset import Dataset |
|
|
|
from models.dataset import Dataset, DatasetCollectionBinding, DocumentSegment |
|
|
|
from models.dataset import Document as DatasetDocument |
|
|
|
from models.model import Account |
|
|
|
from models.provider import Provider, ProviderModel |
|
|
|
|
|
|
|
@@ -124,14 +125,15 @@ def reset_encrypt_key_pair(): |
|
|
|
'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green')) |
|
|
|
|
|
|
|
|
|
|
|
@click.command('create-qdrant-indexes', help='Create qdrant indexes.') |
|
|
|
def create_qdrant_indexes(): |
|
|
|
@click.command('vdb-migrate', help='migrate vector db.') |
|
|
|
def vdb_migrate(): |
|
|
|
""" |
|
|
|
Migrate other vector database datas to Qdrant. |
|
|
|
Migrate vector database datas to target vector database . |
|
|
|
""" |
|
|
|
click.echo(click.style('Start create qdrant indexes.', fg='green')) |
|
|
|
click.echo(click.style('Start migrate vector db.', fg='green')) |
|
|
|
create_count = 0 |
|
|
|
|
|
|
|
config = cast(dict, current_app.config) |
|
|
|
vector_type = config.get('VECTOR_STORE') |
|
|
|
page = 1 |
|
|
|
while True: |
|
|
|
try: |
|
|
|
@@ -140,54 +142,101 @@ def create_qdrant_indexes(): |
|
|
|
except NotFound: |
|
|
|
break |
|
|
|
|
|
|
|
model_manager = ModelManager() |
|
|
|
|
|
|
|
page += 1 |
|
|
|
for dataset in datasets: |
|
|
|
if dataset.index_struct_dict: |
|
|
|
if dataset.index_struct_dict['type'] != 'qdrant': |
|
|
|
try: |
|
|
|
click.echo('Create dataset qdrant index: {}'.format(dataset.id)) |
|
|
|
try: |
|
|
|
embedding_model = model_manager.get_model_instance( |
|
|
|
tenant_id=dataset.tenant_id, |
|
|
|
provider=dataset.embedding_model_provider, |
|
|
|
model_type=ModelType.TEXT_EMBEDDING, |
|
|
|
model=dataset.embedding_model |
|
|
|
|
|
|
|
) |
|
|
|
except Exception: |
|
|
|
continue |
|
|
|
embeddings = CacheEmbedding(embedding_model) |
|
|
|
|
|
|
|
from core.index.vector_index.qdrant_vector_index import QdrantConfig, QdrantVectorIndex |
|
|
|
|
|
|
|
index = QdrantVectorIndex( |
|
|
|
dataset=dataset, |
|
|
|
config=QdrantConfig( |
|
|
|
endpoint=current_app.config.get('QDRANT_URL'), |
|
|
|
api_key=current_app.config.get('QDRANT_API_KEY'), |
|
|
|
root_path=current_app.root_path |
|
|
|
), |
|
|
|
embeddings=embeddings |
|
|
|
) |
|
|
|
if index: |
|
|
|
index.create_qdrant_dataset(dataset) |
|
|
|
index_struct = { |
|
|
|
"type": 'qdrant', |
|
|
|
"vector_store": { |
|
|
|
"class_prefix": dataset.index_struct_dict['vector_store']['class_prefix']} |
|
|
|
} |
|
|
|
dataset.index_struct = json.dumps(index_struct) |
|
|
|
db.session.commit() |
|
|
|
create_count += 1 |
|
|
|
try: |
|
|
|
click.echo('Create dataset vdb index: {}'.format(dataset.id)) |
|
|
|
if dataset.index_struct_dict: |
|
|
|
if dataset.index_struct_dict['type'] == vector_type: |
|
|
|
continue |
|
|
|
if vector_type == "weaviate": |
|
|
|
dataset_id = dataset.id |
|
|
|
collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node' |
|
|
|
index_struct_dict = { |
|
|
|
"type": 'weaviate', |
|
|
|
"vector_store": {"class_prefix": collection_name} |
|
|
|
} |
|
|
|
dataset.index_struct = json.dumps(index_struct_dict) |
|
|
|
elif vector_type == "qdrant": |
|
|
|
if dataset.collection_binding_id: |
|
|
|
dataset_collection_binding = db.session.query(DatasetCollectionBinding). \ |
|
|
|
filter(DatasetCollectionBinding.id == dataset.collection_binding_id). \ |
|
|
|
one_or_none() |
|
|
|
if dataset_collection_binding: |
|
|
|
collection_name = dataset_collection_binding.collection_name |
|
|
|
else: |
|
|
|
click.echo('passed.') |
|
|
|
raise ValueError('Dataset Collection Bindings is not exist!') |
|
|
|
else: |
|
|
|
dataset_id = dataset.id |
|
|
|
collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node' |
|
|
|
index_struct_dict = { |
|
|
|
"type": 'qdrant', |
|
|
|
"vector_store": {"class_prefix": collection_name} |
|
|
|
} |
|
|
|
dataset.index_struct = json.dumps(index_struct_dict) |
|
|
|
|
|
|
|
elif vector_type == "milvus": |
|
|
|
dataset_id = dataset.id |
|
|
|
collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node' |
|
|
|
index_struct_dict = { |
|
|
|
"type": 'milvus', |
|
|
|
"vector_store": {"class_prefix": collection_name} |
|
|
|
} |
|
|
|
dataset.index_struct = json.dumps(index_struct_dict) |
|
|
|
else: |
|
|
|
raise ValueError(f"Vector store {config.get('VECTOR_STORE')} is not supported.") |
|
|
|
|
|
|
|
vector = Vector(dataset) |
|
|
|
click.echo(f"vdb_migrate {dataset.id}") |
|
|
|
|
|
|
|
try: |
|
|
|
vector.delete() |
|
|
|
except Exception as e: |
|
|
|
raise e |
|
|
|
|
|
|
|
dataset_documents = db.session.query(DatasetDocument).filter( |
|
|
|
DatasetDocument.dataset_id == dataset.id, |
|
|
|
DatasetDocument.indexing_status == 'completed', |
|
|
|
DatasetDocument.enabled == True, |
|
|
|
DatasetDocument.archived == False, |
|
|
|
).all() |
|
|
|
|
|
|
|
documents = [] |
|
|
|
for dataset_document in dataset_documents: |
|
|
|
segments = db.session.query(DocumentSegment).filter( |
|
|
|
DocumentSegment.document_id == dataset_document.id, |
|
|
|
DocumentSegment.status == 'completed', |
|
|
|
DocumentSegment.enabled == True |
|
|
|
).all() |
|
|
|
|
|
|
|
for segment in segments: |
|
|
|
document = Document( |
|
|
|
page_content=segment.content, |
|
|
|
metadata={ |
|
|
|
"doc_id": segment.index_node_id, |
|
|
|
"doc_hash": segment.index_node_hash, |
|
|
|
"document_id": segment.document_id, |
|
|
|
"dataset_id": segment.dataset_id, |
|
|
|
} |
|
|
|
) |
|
|
|
|
|
|
|
documents.append(document) |
|
|
|
|
|
|
|
if documents: |
|
|
|
try: |
|
|
|
vector.create(documents) |
|
|
|
except Exception as e: |
|
|
|
click.echo( |
|
|
|
click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)), |
|
|
|
fg='red')) |
|
|
|
continue |
|
|
|
raise e |
|
|
|
click.echo(f"Dataset {dataset.id} create successfully.") |
|
|
|
db.session.add(dataset) |
|
|
|
db.session.commit() |
|
|
|
create_count += 1 |
|
|
|
except Exception as e: |
|
|
|
db.session.rollback() |
|
|
|
click.echo( |
|
|
|
click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)), |
|
|
|
fg='red')) |
|
|
|
continue |
|
|
|
|
|
|
|
click.echo(click.style('Congratulations! Create {} dataset indexes.'.format(create_count), fg='green')) |
|
|
|
|
|
|
|
@@ -196,4 +245,4 @@ def register_commands(app): |
|
|
|
app.cli.add_command(reset_password) |
|
|
|
app.cli.add_command(reset_email) |
|
|
|
app.cli.add_command(reset_encrypt_key_pair) |
|
|
|
app.cli.add_command(create_qdrant_indexes) |
|
|
|
app.cli.add_command(vdb_migrate) |