Quellcode durchsuchen

fix kb permission (#15199)

Signed-off-by: kenwoodjw <blackxin55@gmail.com>
Signed-off-by: kenwoodjw <blackxin55+@gmail.com>
tags/1.0.1
kenwoodjw vor 7 Monaten
Ursprung
Commit
adda049265
Es ist kein Account mit der E-Mail-Adresse des Committers verbunden
2 geänderte Dateien mit 72 neuen und 24 gelöschten Zeilen
  1. 5
    1
      api/controllers/console/datasets/datasets.py
  2. 67
    23
      api/services/dataset_service.py

+ 5
- 1
api/controllers/console/datasets/datasets.py Datei anzeigen

@@ -283,7 +283,11 @@ class DatasetApi(Resource):
data = request.get_json()

# check embedding model setting
if data.get("indexing_technique") == "high_quality":
if (
data.get("indexing_technique") == "high_quality"
and data.get("embedding_model_provider") is not None
and data.get("embedding_model") is not None
):
DatasetService.check_embedding_model_setting(
dataset.tenant_id, data.get("embedding_model_provider"), data.get("embedding_model")
)

+ 67
- 23
api/services/dataset_service.py Datei anzeigen

@@ -245,7 +245,7 @@ class DatasetService:
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ValueError(f"The dataset in unavailable, due to: {ex.description}")
raise ValueError(ex.description)

@staticmethod
def update_dataset(dataset_id, data, user):
@@ -327,31 +327,75 @@ class DatasetService:
raise ValueError(ex.description)
else:
# add default plugin id to both setting sets, to make sure the plugin model provider is consistent
plugin_model_provider = dataset.embedding_model_provider
plugin_model_provider = str(ModelProviderID(plugin_model_provider))

new_plugin_model_provider = data["embedding_model_provider"]
new_plugin_model_provider = str(ModelProviderID(new_plugin_model_provider))

# Skip embedding model checks if not provided in the update request
if (
new_plugin_model_provider != plugin_model_provider
or data["embedding_model"] != dataset.embedding_model
"embedding_model_provider" not in data
or "embedding_model" not in data
or not data.get("embedding_model_provider")
or not data.get("embedding_model")
):
action = "update"
# If the dataset already has embedding model settings, use those
if dataset.embedding_model_provider and dataset.embedding_model:
# Keep existing values
filtered_data["embedding_model_provider"] = dataset.embedding_model_provider
filtered_data["embedding_model"] = dataset.embedding_model
# If collection_binding_id exists, keep it too
if dataset.collection_binding_id:
filtered_data["collection_binding_id"] = dataset.collection_binding_id
# Otherwise, don't try to update embedding model settings at all
# Remove these fields from filtered_data if they exist but are None/empty
if "embedding_model_provider" in filtered_data and not filtered_data["embedding_model_provider"]:
del filtered_data["embedding_model_provider"]
if "embedding_model" in filtered_data and not filtered_data["embedding_model"]:
del filtered_data["embedding_model"]
else:
skip_embedding_update = False
try:
model_manager = ModelManager()
embedding_model = model_manager.get_model_instance(
tenant_id=current_user.current_tenant_id,
provider=data["embedding_model_provider"],
model_type=ModelType.TEXT_EMBEDDING,
model=data["embedding_model"],
)
filtered_data["embedding_model"] = embedding_model.model
filtered_data["embedding_model_provider"] = embedding_model.provider
dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding(
embedding_model.provider, embedding_model.model
)
filtered_data["collection_binding_id"] = dataset_collection_binding.id
# Handle existing model provider
plugin_model_provider = dataset.embedding_model_provider
plugin_model_provider_str = None
if plugin_model_provider:
plugin_model_provider_str = str(ModelProviderID(plugin_model_provider))

# Handle new model provider from request
new_plugin_model_provider = data["embedding_model_provider"]
new_plugin_model_provider_str = None
if new_plugin_model_provider:
new_plugin_model_provider_str = str(ModelProviderID(new_plugin_model_provider))

# Only update embedding model if both values are provided and different from current
if (
plugin_model_provider_str != new_plugin_model_provider_str
or data["embedding_model"] != dataset.embedding_model
):
action = "update"
model_manager = ModelManager()
try:
embedding_model = model_manager.get_model_instance(
tenant_id=current_user.current_tenant_id,
provider=data["embedding_model_provider"],
model_type=ModelType.TEXT_EMBEDDING,
model=data["embedding_model"],
)
except ProviderTokenNotInitError:
# If we can't get the embedding model, skip updating it
# and keep the existing settings if available
if dataset.embedding_model_provider and dataset.embedding_model:
filtered_data["embedding_model_provider"] = dataset.embedding_model_provider
filtered_data["embedding_model"] = dataset.embedding_model
if dataset.collection_binding_id:
filtered_data["collection_binding_id"] = dataset.collection_binding_id
# Skip the rest of the embedding model update
skip_embedding_update = True
if not skip_embedding_update:
filtered_data["embedding_model"] = embedding_model.model
filtered_data["embedding_model_provider"] = embedding_model.provider
dataset_collection_binding = (
DatasetCollectionBindingService.get_dataset_collection_binding(
embedding_model.provider, embedding_model.model
)
)
filtered_data["collection_binding_id"] = dataset_collection_binding.id
except LLMBadRequestError:
raise ValueError(
"No Embedding Model available. Please configure a valid provider "

Laden…
Abbrechen
Speichern