### What problem does this PR solve? This PR introduces Pydantic-based validation for the delete dataset HTTP API, improving code clarity and robustness. Key changes include: 1. Pydantic Validation 2. Error Handling 3. Test Updates 4. Documentation Updates ### Type of change - [x] Documentation Update - [x] Refactoringtags/v0.19.0
| @@ -20,7 +20,6 @@ import logging | |||
| from flask import request | |||
| from peewee import OperationalError | |||
| from api import settings | |||
| from api.db import FileSource, StatusEnum | |||
| from api.db.db_models import File | |||
| from api.db.services.document_service import DocumentService | |||
| @@ -30,7 +29,6 @@ from api.db.services.knowledgebase_service import KnowledgebaseService | |||
| from api.db.services.user_service import TenantService | |||
| from api.utils import get_uuid | |||
| from api.utils.api_utils import ( | |||
| check_duplicate_ids, | |||
| deep_merge, | |||
| get_error_argument_result, | |||
| get_error_data_result, | |||
| @@ -39,7 +37,7 @@ from api.utils.api_utils import ( | |||
| token_required, | |||
| verify_embedding_availability, | |||
| ) | |||
| from api.utils.validation_utils import CreateDatasetReq, UpdateDatasetReq, validate_and_parse_json_request | |||
| from api.utils.validation_utils import CreateDatasetReq, DeleteDatasetReq, UpdateDatasetReq, validate_and_parse_json_request | |||
| @manager.route("/datasets", methods=["POST"]) # noqa: F821 | |||
| @@ -190,72 +188,85 @@ def delete(tenant_id): | |||
| required: true | |||
| schema: | |||
| type: object | |||
| required: | |||
| - ids | |||
| properties: | |||
| ids: | |||
| type: array | |||
| type: array or null | |||
| items: | |||
| type: string | |||
| description: List of dataset IDs to delete. | |||
| description: | | |||
| Specifies the datasets to delete: | |||
| - If `null`, all datasets will be deleted. | |||
| - If an array of IDs, only the specified datasets will be deleted. | |||
| - If an empty array, no datasets will be deleted. | |||
| responses: | |||
| 200: | |||
| description: Successful operation. | |||
| schema: | |||
| type: object | |||
| """ | |||
| req, err = validate_and_parse_json_request(request, DeleteDatasetReq) | |||
| if err is not None: | |||
| return get_error_argument_result(err) | |||
| errors = [] | |||
| success_count = 0 | |||
| req = request.json | |||
| if not req: | |||
| ids = None | |||
| else: | |||
| ids = req.get("ids") | |||
| if not ids: | |||
| id_list = [] | |||
| kbs = KnowledgebaseService.query(tenant_id=tenant_id) | |||
| for kb in kbs: | |||
| id_list.append(kb.id) | |||
| kb_id_instance_pairs = [] | |||
| if req["ids"] is None: | |||
| try: | |||
| kbs = KnowledgebaseService.query(tenant_id=tenant_id) | |||
| for kb in kbs: | |||
| kb_id_instance_pairs.append((kb.id, kb)) | |||
| except OperationalError as e: | |||
| logging.exception(e) | |||
| return get_error_data_result(message="Database operation failed") | |||
| else: | |||
| id_list = ids | |||
| unique_id_list, duplicate_messages = check_duplicate_ids(id_list, "dataset") | |||
| id_list = unique_id_list | |||
| error_kb_ids = [] | |||
| for kb_id in req["ids"]: | |||
| try: | |||
| kb = KnowledgebaseService.get_or_none(id=kb_id, tenant_id=tenant_id) | |||
| if kb is None: | |||
| error_kb_ids.append(kb_id) | |||
| continue | |||
| kb_id_instance_pairs.append((kb_id, kb)) | |||
| except OperationalError as e: | |||
| logging.exception(e) | |||
| return get_error_data_result(message="Database operation failed") | |||
| if len(error_kb_ids) > 0: | |||
| return get_error_data_result(message=f"""User '{tenant_id}' lacks permission for datasets: '{", ".join(error_kb_ids)}'""") | |||
| for id in id_list: | |||
| kbs = KnowledgebaseService.query(id=id, tenant_id=tenant_id) | |||
| if not kbs: | |||
| errors.append(f"You don't own the dataset {id}") | |||
| continue | |||
| for doc in DocumentService.query(kb_id=id): | |||
| if not DocumentService.remove_document(doc, tenant_id): | |||
| errors.append(f"Remove document error for dataset {id}") | |||
| errors = [] | |||
| success_count = 0 | |||
| for kb_id, kb in kb_id_instance_pairs: | |||
| try: | |||
| for doc in DocumentService.query(kb_id=kb_id): | |||
| if not DocumentService.remove_document(doc, tenant_id): | |||
| errors.append(f"Remove document '{doc.id}' error for dataset '{kb_id}'") | |||
| continue | |||
| f2d = File2DocumentService.get_by_document_id(doc.id) | |||
| FileService.filter_delete( | |||
| [ | |||
| File.source_type == FileSource.KNOWLEDGEBASE, | |||
| File.id == f2d[0].file_id, | |||
| ] | |||
| ) | |||
| File2DocumentService.delete_by_document_id(doc.id) | |||
| FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kb.name]) | |||
| if not KnowledgebaseService.delete_by_id(kb_id): | |||
| errors.append(f"Delete dataset error for {kb_id}") | |||
| continue | |||
| f2d = File2DocumentService.get_by_document_id(doc.id) | |||
| FileService.filter_delete( | |||
| [ | |||
| File.source_type == FileSource.KNOWLEDGEBASE, | |||
| File.id == f2d[0].file_id, | |||
| ] | |||
| ) | |||
| File2DocumentService.delete_by_document_id(doc.id) | |||
| FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name]) | |||
| if not KnowledgebaseService.delete_by_id(id): | |||
| errors.append(f"Delete dataset error for {id}") | |||
| continue | |||
| success_count += 1 | |||
| if errors: | |||
| if success_count > 0: | |||
| return get_result(data={"success_count": success_count, "errors": errors}, message=f"Partially deleted {success_count} datasets with {len(errors)} errors") | |||
| else: | |||
| return get_error_data_result(message="; ".join(errors)) | |||
| if duplicate_messages: | |||
| if success_count > 0: | |||
| return get_result( | |||
| message=f"Partially deleted {success_count} datasets with {len(duplicate_messages)} errors", | |||
| data={"success_count": success_count, "errors": duplicate_messages}, | |||
| ) | |||
| else: | |||
| return get_error_data_result(message=";".join(duplicate_messages)) | |||
| return get_result(code=settings.RetCode.SUCCESS) | |||
| success_count += 1 | |||
| except OperationalError as e: | |||
| logging.exception(e) | |||
| return get_error_data_result(message="Database operation failed") | |||
| if not errors: | |||
| return get_result() | |||
| error_message = f"Successfully deleted {success_count} datasets, {len(errors)} failed. Details: {'; '.join(errors)[:128]}..." | |||
| if success_count == 0: | |||
| return get_error_data_result(message=error_message) | |||
| return get_result(data={"success_count": success_count, "errors": errors[:5]}, message=error_message) | |||
| @manager.route("/datasets/<dataset_id>", methods=["PUT"]) # noqa: F821 | |||
| @@ -373,7 +384,7 @@ def update(tenant_id, dataset_id): | |||
| logging.exception(e) | |||
| return get_error_data_result(message="Database operation failed") | |||
| return get_result(code=settings.RetCode.SUCCESS) | |||
| return get_result() | |||
| @manager.route("/datasets", methods=["GET"]) # noqa: F821 | |||
| @@ -14,11 +14,13 @@ | |||
| # limitations under the License. | |||
| # | |||
| import uuid | |||
| from collections import Counter | |||
| from enum import auto | |||
| from typing import Annotated, Any | |||
| from flask import Request | |||
| from pydantic import UUID1, BaseModel, Field, StringConstraints, ValidationError, field_serializer, field_validator | |||
| from pydantic_core import PydanticCustomError | |||
| from strenum import StrEnum | |||
| from werkzeug.exceptions import BadRequest, UnsupportedMediaType | |||
| @@ -238,7 +240,7 @@ class CreateDatasetReq(Base): | |||
| str: Validated Base64 string | |||
| Raises: | |||
| ValueError: For structural errors in these cases: | |||
| PydanticCustomError: For structural errors in these cases: | |||
| - Missing MIME prefix header | |||
| - Invalid MIME prefix format | |||
| - Unsupported image MIME type | |||
| @@ -259,16 +261,16 @@ class CreateDatasetReq(Base): | |||
| if "," in v: | |||
| prefix, _ = v.split(",", 1) | |||
| if not prefix.startswith("data:"): | |||
| raise ValueError("Invalid MIME prefix format. Must start with 'data:'") | |||
| raise PydanticCustomError("format_invalid", "Invalid MIME prefix format. Must start with 'data:'") | |||
| mime_type = prefix[5:].split(";")[0] | |||
| supported_mime_types = ["image/jpeg", "image/png"] | |||
| if mime_type not in supported_mime_types: | |||
| raise ValueError(f"Unsupported MIME type. Allowed: {supported_mime_types}") | |||
| raise PydanticCustomError("format_invalid", "Unsupported MIME type. Allowed: {supported_mime_types}", {"supported_mime_types": supported_mime_types}) | |||
| return v | |||
| else: | |||
| raise ValueError("Missing MIME prefix. Expected format: data:<mime>;base64,<data>") | |||
| raise PydanticCustomError("format_invalid", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>") | |||
| @field_validator("embedding_model", mode="after") | |||
| @classmethod | |||
| @@ -288,7 +290,7 @@ class CreateDatasetReq(Base): | |||
| str: Validated <model_name>@<provider> format | |||
| Raises: | |||
| ValueError: For these violations: | |||
| PydanticCustomError: For these violations: | |||
| - Missing @ separator | |||
| - Empty model_name/provider | |||
| - Invalid component structure | |||
| @@ -300,15 +302,15 @@ class CreateDatasetReq(Base): | |||
| Invalid: "text-embedding-3-large@" (empty provider) | |||
| """ | |||
| if "@" not in v: | |||
| raise ValueError("Embedding model identifier must follow <model_name>@<provider> format") | |||
| raise PydanticCustomError("format_invalid", "Embedding model identifier must follow <model_name>@<provider> format") | |||
| components = v.split("@", 1) | |||
| if len(components) != 2 or not all(components): | |||
| raise ValueError("Both model_name and provider must be non-empty strings") | |||
| raise PydanticCustomError("format_invalid", "Both model_name and provider must be non-empty strings") | |||
| model_name, provider = components | |||
| if not model_name.strip() or not provider.strip(): | |||
| raise ValueError("Model name and provider cannot be whitespace-only strings") | |||
| raise PydanticCustomError("format_invalid", "Model name and provider cannot be whitespace-only strings") | |||
| return v | |||
| @field_validator("permission", mode="before") | |||
| @@ -374,13 +376,13 @@ class CreateDatasetReq(Base): | |||
| ParserConfig | None: Validated configuration object | |||
| Raises: | |||
| ValueError: When serialized JSON exceeds 65,535 characters | |||
| PydanticCustomError: When serialized JSON exceeds 65,535 characters | |||
| """ | |||
| if v is None: | |||
| return None | |||
| if (json_str := v.model_dump_json()) and len(json_str) > 65535: | |||
| raise ValueError(f"Parser config exceeds size limit (max 65,535 characters). Current size: {len(json_str):,}") | |||
| raise PydanticCustomError("string_too_long", "Parser config exceeds size limit (max 65,535 characters). Current size: {actual}", {"actual": len(json_str)}) | |||
| return v | |||
| @@ -390,4 +392,88 @@ class UpdateDatasetReq(CreateDatasetReq): | |||
| @field_serializer("dataset_id") | |||
| def serialize_uuid_to_hex(self, v: uuid.UUID) -> str: | |||
| """ | |||
| Serializes a UUID version 1 object to its hexadecimal string representation. | |||
| This field serializer specifically handles UUID version 1 objects, converting them | |||
| to their canonical 32-character hexadecimal format without hyphens. The conversion | |||
| is designed for consistent serialization in API responses and database storage. | |||
| Args: | |||
| v (uuid.UUID1): The UUID version 1 object to serialize. Must be a valid | |||
| UUID1 instance generated by Python's uuid module. | |||
| Returns: | |||
| str: 32-character lowercase hexadecimal string representation | |||
| Example: "550e8400e29b41d4a716446655440000" | |||
| Raises: | |||
| AttributeError: If input is not a proper UUID object (missing hex attribute) | |||
| TypeError: If input is not a UUID1 instance (when type checking is enabled) | |||
| Notes: | |||
| - Version 1 UUIDs contain timestamp and MAC address information | |||
| - The .hex property automatically converts to lowercase hexadecimal | |||
| - For cross-version compatibility, consider typing as uuid.UUID instead | |||
| """ | |||
| return v.hex | |||
| class DeleteReq(Base): | |||
| ids: list[UUID1] | None = Field(...) | |||
| @field_validator("ids", mode="after") | |||
| def check_duplicate_ids(cls, v: list[UUID1] | None) -> list[str] | None: | |||
| """ | |||
| Validates and converts a list of UUID1 objects to hexadecimal strings while checking for duplicates. | |||
| This validator implements a three-stage processing pipeline: | |||
| 1. Null Handling - returns None for empty/null input | |||
| 2. UUID Conversion - transforms UUID objects to hex strings | |||
| 3. Duplicate Validation - ensures all IDs are unique | |||
| Behavior Specifications: | |||
| - Input: None → Returns None (indicates no operation) | |||
| - Input: [] → Returns [] (empty list for explicit no-op) | |||
| - Input: [UUID1,...] → Returns validated hex strings | |||
| - Duplicates: Raises formatted PydanticCustomError | |||
| Args: | |||
| v (list[UUID1] | None): | |||
| - None: Indicates no datasets should be processed | |||
| - Empty list: Explicit empty operation | |||
| - Populated list: Dataset UUIDs to validate/convert | |||
| Returns: | |||
| list[str] | None: | |||
| - None when input is None | |||
| - List of 32-character hex strings (lowercase, no hyphens) | |||
| Example: ["550e8400e29b41d4a716446655440000"] | |||
| Raises: | |||
| PydanticCustomError: When duplicates detected, containing: | |||
| - Error type: "duplicate_uuids" | |||
| - Template message: "Duplicate ids: '{duplicate_ids}'" | |||
| - Context: {"duplicate_ids": "id1, id2, ..."} | |||
| Example: | |||
| >>> validate([UUID("..."), UUID("...")]) | |||
| ["2cdf0456e9a711ee8000000000000000", ...] | |||
| >>> validate([UUID("..."), UUID("...")]) # Duplicates | |||
| PydanticCustomError: Duplicate ids: '2cdf0456e9a711ee8000000000000000' | |||
| """ | |||
| if not v: | |||
| return v | |||
| uuid_hex_list = [ids.hex for ids in v] | |||
| duplicates = [item for item, count in Counter(uuid_hex_list).items() if count > 1] | |||
| if duplicates: | |||
| duplicates_str = ", ".join(duplicates) | |||
| raise PydanticCustomError("duplicate_uuids", "Duplicate ids: '{duplicate_ids}'", {"duplicate_ids": duplicates_str}) | |||
| return uuid_hex_list | |||
| class DeleteDatasetReq(DeleteReq): ... | |||
| @@ -507,7 +507,7 @@ Deletes datasets by ID. | |||
| - `'content-Type: application/json'` | |||
| - `'Authorization: Bearer <YOUR_API_KEY>'` | |||
| - Body: | |||
| - `"ids"`: `list[string]` | |||
| - `"ids"`: `list[string]` or `null` | |||
| ##### Request example | |||
| @@ -517,14 +517,17 @@ curl --request DELETE \ | |||
| --header 'Content-Type: application/json' \ | |||
| --header 'Authorization: Bearer <YOUR_API_KEY>' \ | |||
| --data '{ | |||
| "ids": ["test_1", "test_2"] | |||
| "ids": ["d94a8dc02c9711f0930f7fbc369eab6d", "e94a8dc02c9711f0930f7fbc369eab6e"] | |||
| }' | |||
| ``` | |||
| ##### Request parameters | |||
| - `"ids"`: (*Body parameter*), `list[string]` | |||
| The IDs of the datasets to delete. If it is not specified, all datasets will be deleted. | |||
| - `"ids"`: (*Body parameter*), `list[string]` or `null`, *Required* | |||
| Specifies the datasets to delete: | |||
| - If `null`, all datasets will be deleted. | |||
| - If an array of IDs, only the specified datasets will be deleted. | |||
| - If an empty array, no datasets will be deleted. | |||
| #### Response | |||
| @@ -200,16 +200,19 @@ dataset = rag_object.create_dataset(name="kb_1") | |||
| ### Delete datasets | |||
| ```python | |||
| RAGFlow.delete_datasets(ids: list[str] = None) | |||
| RAGFlow.delete_datasets(ids: list[str] | None = None) | |||
| ``` | |||
| Deletes datasets by ID. | |||
| #### Parameters | |||
| ##### ids: `list[str]`, *Required* | |||
| ##### ids: `list[str]` or `None`, *Required* | |||
| The IDs of the datasets to delete. Defaults to `None`. If it is not specified, all datasets will be deleted. | |||
| The IDs of the datasets to delete. Defaults to `None`. | |||
| - If `None`, all datasets will be deleted. | |||
| - If an array of IDs, only the specified datasets will be deleted. | |||
| - If an empty array, no datasets will be deleted. | |||
| #### Returns | |||
| @@ -219,7 +222,7 @@ The IDs of the datasets to delete. Defaults to `None`. If it is not specified, a | |||
| #### Examples | |||
| ```python | |||
| rag_object.delete_datasets(ids=["id_1","id_2"]) | |||
| rag_object.delete_datasets(ids=["d94a8dc02c9711f0930f7fbc369eab6d","e94a8dc02c9711f0930f7fbc369eab6e"]) | |||
| ``` | |||
| --- | |||
| @@ -76,7 +76,7 @@ def condition(_auth, _dataset_id): | |||
| @pytest.fixture(scope="function") | |||
| def clear_datasets(request, get_http_api_auth): | |||
| def cleanup(): | |||
| delete_datasets(get_http_api_auth) | |||
| delete_datasets(get_http_api_auth, {"ids": None}) | |||
| request.addfinalizer(cleanup) | |||
| @@ -132,7 +132,7 @@ def ragflow_tmp_dir(request, tmp_path_factory): | |||
| @pytest.fixture(scope="class") | |||
| def add_dataset(request, get_http_api_auth): | |||
| def cleanup(): | |||
| delete_datasets(get_http_api_auth) | |||
| delete_datasets(get_http_api_auth, {"ids": None}) | |||
| request.addfinalizer(cleanup) | |||
| @@ -143,12 +143,11 @@ def add_dataset(request, get_http_api_auth): | |||
| @pytest.fixture(scope="function") | |||
| def add_dataset_func(request, get_http_api_auth): | |||
| def cleanup(): | |||
| delete_datasets(get_http_api_auth) | |||
| delete_datasets(get_http_api_auth, {"ids": None}) | |||
| request.addfinalizer(cleanup) | |||
| dataset_ids = batch_create_datasets(get_http_api_auth, 1) | |||
| return dataset_ids[0] | |||
| return batch_create_datasets(get_http_api_auth, 1)[0] | |||
| @pytest.fixture(scope="class") | |||
| @@ -22,7 +22,7 @@ from common import batch_create_datasets, delete_datasets | |||
| @pytest.fixture(scope="class") | |||
| def add_datasets(get_http_api_auth, request): | |||
| def cleanup(): | |||
| delete_datasets(get_http_api_auth) | |||
| delete_datasets(get_http_api_auth, {"ids": None}) | |||
| request.addfinalizer(cleanup) | |||
| @@ -32,18 +32,8 @@ def add_datasets(get_http_api_auth, request): | |||
| @pytest.fixture(scope="function") | |||
| def add_datasets_func(get_http_api_auth, request): | |||
| def cleanup(): | |||
| delete_datasets(get_http_api_auth) | |||
| delete_datasets(get_http_api_auth, {"ids": None}) | |||
| request.addfinalizer(cleanup) | |||
| return batch_create_datasets(get_http_api_auth, 3) | |||
| @pytest.fixture(scope="function") | |||
| def add_dataset_func(get_http_api_auth, request): | |||
| def cleanup(): | |||
| delete_datasets(get_http_api_auth) | |||
| request.addfinalizer(cleanup) | |||
| return batch_create_datasets(get_http_api_auth, 1)[0] | |||
| @@ -25,8 +25,8 @@ from common import ( | |||
| from libs.auth import RAGFlowHttpApiAuth | |||
| @pytest.mark.p1 | |||
| class TestAuthorization: | |||
| @pytest.mark.p1 | |||
| @pytest.mark.parametrize( | |||
| "auth, expected_code, expected_message", | |||
| [ | |||
| @@ -38,104 +38,173 @@ class TestAuthorization: | |||
| ), | |||
| ], | |||
| ) | |||
| def test_invalid_auth(self, auth, expected_code, expected_message): | |||
| def test_auth_invalid(self, auth, expected_code, expected_message): | |||
| res = delete_datasets(auth) | |||
| assert res["code"] == expected_code | |||
| assert res["message"] == expected_message | |||
| class TestDatasetsDeletion: | |||
| class TestRquest: | |||
| @pytest.mark.p3 | |||
| def test_content_type_bad(self, get_http_api_auth): | |||
| BAD_CONTENT_TYPE = "text/xml" | |||
| res = delete_datasets(get_http_api_auth, headers={"Content-Type": BAD_CONTENT_TYPE}) | |||
| assert res["code"] == 101, res | |||
| assert res["message"] == f"Unsupported content type: Expected application/json, got {BAD_CONTENT_TYPE}", res | |||
| @pytest.mark.p3 | |||
| @pytest.mark.parametrize( | |||
| "payload, expected_message", | |||
| [ | |||
| ("a", "Malformed JSON syntax: Missing commas/brackets or invalid encoding"), | |||
| ('"a"', "Invalid request payload: expected object, got str"), | |||
| ], | |||
| ids=["malformed_json_syntax", "invalid_request_payload_type"], | |||
| ) | |||
| def test_payload_bad(self, get_http_api_auth, payload, expected_message): | |||
| res = delete_datasets(get_http_api_auth, data=payload) | |||
| assert res["code"] == 101, res | |||
| assert res["message"] == expected_message, res | |||
| @pytest.mark.p3 | |||
| def test_payload_unset(self, get_http_api_auth): | |||
| res = delete_datasets(get_http_api_auth, None) | |||
| assert res["code"] == 101, res | |||
| assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res | |||
| class TestCapability: | |||
| @pytest.mark.p3 | |||
| def test_delete_dataset_1k(self, get_http_api_auth): | |||
| ids = batch_create_datasets(get_http_api_auth, 1_000) | |||
| res = delete_datasets(get_http_api_auth, {"ids": ids}) | |||
| assert res["code"] == 0, res | |||
| res = list_datasets(get_http_api_auth) | |||
| assert len(res["data"]) == 0, res | |||
| @pytest.mark.p3 | |||
| def test_concurrent_deletion(self, get_http_api_auth): | |||
| dataset_num = 1_000 | |||
| ids = batch_create_datasets(get_http_api_auth, dataset_num) | |||
| with ThreadPoolExecutor(max_workers=5) as executor: | |||
| futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(dataset_num)] | |||
| responses = [f.result() for f in futures] | |||
| assert all(r["code"] == 0 for r in responses), responses | |||
| class TestDatasetsDelete: | |||
| @pytest.mark.p1 | |||
| @pytest.mark.parametrize( | |||
| "payload, expected_code, expected_message, remaining", | |||
| "func, expected_code, expected_message, remaining", | |||
| [ | |||
| (None, 0, "", 0), | |||
| ({"ids": []}, 0, "", 0), | |||
| ({"ids": ["invalid_id"]}, 102, "You don't own the dataset invalid_id", 3), | |||
| ( | |||
| {"ids": ["\n!?。;!?\"'"]}, | |||
| 102, | |||
| "You don't own the dataset \n!?。;!?\"'", | |||
| 3, | |||
| ), | |||
| ( | |||
| "not json", | |||
| 100, | |||
| "AttributeError(\"'str' object has no attribute 'get'\")", | |||
| 3, | |||
| ), | |||
| (lambda r: {"ids": r[:1]}, 0, "", 2), | |||
| (lambda r: {"ids": r}, 0, "", 0), | |||
| ], | |||
| ids=["single_dataset", "multiple_datasets"], | |||
| ) | |||
| def test_basic_scenarios(self, get_http_api_auth, add_datasets_func, payload, expected_code, expected_message, remaining): | |||
| def test_ids(self, get_http_api_auth, add_datasets_func, func, expected_code, expected_message, remaining): | |||
| dataset_ids = add_datasets_func | |||
| if callable(payload): | |||
| payload = payload(dataset_ids) | |||
| if callable(func): | |||
| payload = func(dataset_ids) | |||
| res = delete_datasets(get_http_api_auth, payload) | |||
| assert res["code"] == expected_code | |||
| if res["code"] != 0: | |||
| assert res["message"] == expected_message | |||
| assert res["code"] == expected_code, res | |||
| res = list_datasets(get_http_api_auth) | |||
| assert len(res["data"]) == remaining, res | |||
| @pytest.mark.p1 | |||
| @pytest.mark.usefixtures("add_dataset_func") | |||
| def test_ids_empty(self, get_http_api_auth): | |||
| payload = {"ids": []} | |||
| res = delete_datasets(get_http_api_auth, payload) | |||
| assert res["code"] == 0, res | |||
| res = list_datasets(get_http_api_auth) | |||
| assert len(res["data"]) == 1, res | |||
| @pytest.mark.p1 | |||
| @pytest.mark.usefixtures("add_datasets_func") | |||
| def test_ids_none(self, get_http_api_auth): | |||
| payload = {"ids": None} | |||
| res = delete_datasets(get_http_api_auth, payload) | |||
| assert res["code"] == 0, res | |||
| res = list_datasets(get_http_api_auth) | |||
| assert len(res["data"]) == 0, res | |||
| @pytest.mark.p2 | |||
| @pytest.mark.usefixtures("add_dataset_func") | |||
| def test_id_not_uuid(self, get_http_api_auth): | |||
| payload = {"ids": ["not_uuid"]} | |||
| res = delete_datasets(get_http_api_auth, payload) | |||
| assert res["code"] == 101, res | |||
| assert "Input should be a valid UUID" in res["message"], res | |||
| res = list_datasets(get_http_api_auth) | |||
| assert len(res["data"]) == 1, res | |||
| @pytest.mark.p2 | |||
| @pytest.mark.usefixtures("add_dataset_func") | |||
| def test_id_wrong_uuid(self, get_http_api_auth): | |||
| payload = {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"]} | |||
| res = delete_datasets(get_http_api_auth, payload) | |||
| assert res["code"] == 102, res | |||
| assert "lacks permission for dataset" in res["message"], res | |||
| res = list_datasets(get_http_api_auth) | |||
| assert len(res["data"]) == remaining | |||
| assert len(res["data"]) == 1, res | |||
| @pytest.mark.p2 | |||
| @pytest.mark.parametrize( | |||
| "payload", | |||
| "func", | |||
| [ | |||
| lambda r: {"ids": ["invalid_id"] + r}, | |||
| lambda r: {"ids": r[:1] + ["invalid_id"] + r[1:3]}, | |||
| lambda r: {"ids": r + ["invalid_id"]}, | |||
| lambda r: {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"] + r}, | |||
| lambda r: {"ids": r[:1] + ["d94a8dc02c9711f0930f7fbc369eab6d"] + r[1:3]}, | |||
| lambda r: {"ids": r + ["d94a8dc02c9711f0930f7fbc369eab6d"]}, | |||
| ], | |||
| ) | |||
| def test_delete_partial_invalid_id(self, get_http_api_auth, add_datasets_func, payload): | |||
| def test_ids_partial_invalid(self, get_http_api_auth, add_datasets_func, func): | |||
| dataset_ids = add_datasets_func | |||
| if callable(payload): | |||
| payload = payload(dataset_ids) | |||
| if callable(func): | |||
| payload = func(dataset_ids) | |||
| res = delete_datasets(get_http_api_auth, payload) | |||
| assert res["code"] == 0 | |||
| assert res["data"]["errors"][0] == "You don't own the dataset invalid_id" | |||
| assert res["data"]["success_count"] == 3 | |||
| assert res["code"] == 102, res | |||
| assert "lacks permission for dataset" in res["message"], res | |||
| res = list_datasets(get_http_api_auth) | |||
| assert len(res["data"]) == 0 | |||
| assert len(res["data"]) == 3, res | |||
| @pytest.mark.p2 | |||
| def test_repeated_deletion(self, get_http_api_auth, add_datasets_func): | |||
| def test_ids_duplicate(self, get_http_api_auth, add_datasets_func): | |||
| dataset_ids = add_datasets_func | |||
| res = delete_datasets(get_http_api_auth, {"ids": dataset_ids}) | |||
| assert res["code"] == 0 | |||
| payload = {"ids": dataset_ids + dataset_ids} | |||
| res = delete_datasets(get_http_api_auth, payload) | |||
| assert res["code"] == 101, res | |||
| assert "Duplicate ids:" in res["message"], res | |||
| res = delete_datasets(get_http_api_auth, {"ids": dataset_ids}) | |||
| assert res["code"] == 102 | |||
| assert "You don't own the dataset" in res["message"] | |||
| res = list_datasets(get_http_api_auth) | |||
| assert len(res["data"]) == 3, res | |||
| @pytest.mark.p2 | |||
| def test_duplicate_deletion(self, get_http_api_auth, add_datasets_func): | |||
| def test_repeated_delete(self, get_http_api_auth, add_datasets_func): | |||
| dataset_ids = add_datasets_func | |||
| res = delete_datasets(get_http_api_auth, {"ids": dataset_ids + dataset_ids}) | |||
| assert res["code"] == 0 | |||
| assert "Duplicate dataset ids" in res["data"]["errors"][0] | |||
| assert res["data"]["success_count"] == 3 | |||
| res = list_datasets(get_http_api_auth) | |||
| assert len(res["data"]) == 0 | |||
| @pytest.mark.p3 | |||
| def test_concurrent_deletion(self, get_http_api_auth): | |||
| ids = batch_create_datasets(get_http_api_auth, 100) | |||
| payload = {"ids": dataset_ids} | |||
| res = delete_datasets(get_http_api_auth, payload) | |||
| assert res["code"] == 0, res | |||
| with ThreadPoolExecutor(max_workers=5) as executor: | |||
| futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(100)] | |||
| responses = [f.result() for f in futures] | |||
| assert all(r["code"] == 0 for r in responses) | |||
| res = delete_datasets(get_http_api_auth, payload) | |||
| assert res["code"] == 102, res | |||
| assert "lacks permission for dataset" in res["message"], res | |||
| @pytest.mark.p3 | |||
| def test_delete_10k(self, get_http_api_auth): | |||
| ids = batch_create_datasets(get_http_api_auth, 10_000) | |||
| res = delete_datasets(get_http_api_auth, {"ids": ids}) | |||
| assert res["code"] == 0 | |||
| @pytest.mark.p2 | |||
| @pytest.mark.usefixtures("add_dataset_func") | |||
| def test_field_unsupported(self, get_http_api_auth): | |||
| payload = {"unknown_field": "unknown_field"} | |||
| res = delete_datasets(get_http_api_auth, payload) | |||
| assert res["code"] == 101, res | |||
| assert "Extra inputs are not permitted" in res["message"], res | |||
| res = list_datasets(get_http_api_auth) | |||
| assert len(res["data"]) == 0 | |||
| assert len(res["data"]) == 1, res | |||
| @@ -77,6 +77,13 @@ class TestRquest: | |||
| assert res["code"] == 101, res | |||
| assert res["message"] == "No properties were modified", res | |||
| @pytest.mark.p3 | |||
| def test_payload_unset(self, get_http_api_auth, add_dataset_func): | |||
| dataset_id = add_dataset_func | |||
| res = update_dataset(get_http_api_auth, dataset_id, None) | |||
| assert res["code"] == 101, res | |||
| assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res | |||
| class TestCapability: | |||
| @pytest.mark.p3 | |||