### What problem does this PR solve? This PR introduces Pydantic-based validation for the delete dataset HTTP API, improving code clarity and robustness. Key changes include: 1. Pydantic Validation 2. Error Handling 3. Test Updates 4. Documentation Updates ### Type of change - [x] Documentation Update - [x] Refactoringtags/v0.19.0
| from flask import request | from flask import request | ||||
| from peewee import OperationalError | from peewee import OperationalError | ||||
| from api import settings | |||||
| from api.db import FileSource, StatusEnum | from api.db import FileSource, StatusEnum | ||||
| from api.db.db_models import File | from api.db.db_models import File | ||||
| from api.db.services.document_service import DocumentService | from api.db.services.document_service import DocumentService | ||||
| from api.db.services.user_service import TenantService | from api.db.services.user_service import TenantService | ||||
| from api.utils import get_uuid | from api.utils import get_uuid | ||||
| from api.utils.api_utils import ( | from api.utils.api_utils import ( | ||||
| check_duplicate_ids, | |||||
| deep_merge, | deep_merge, | ||||
| get_error_argument_result, | get_error_argument_result, | ||||
| get_error_data_result, | get_error_data_result, | ||||
| token_required, | token_required, | ||||
| verify_embedding_availability, | verify_embedding_availability, | ||||
| ) | ) | ||||
| from api.utils.validation_utils import CreateDatasetReq, UpdateDatasetReq, validate_and_parse_json_request | |||||
| from api.utils.validation_utils import CreateDatasetReq, DeleteDatasetReq, UpdateDatasetReq, validate_and_parse_json_request | |||||
| @manager.route("/datasets", methods=["POST"]) # noqa: F821 | @manager.route("/datasets", methods=["POST"]) # noqa: F821 | ||||
| required: true | required: true | ||||
| schema: | schema: | ||||
| type: object | type: object | ||||
| required: | |||||
| - ids | |||||
| properties: | properties: | ||||
| ids: | ids: | ||||
| type: array | |||||
| type: array or null | |||||
| items: | items: | ||||
| type: string | type: string | ||||
| description: List of dataset IDs to delete. | |||||
| description: | | |||||
| Specifies the datasets to delete: | |||||
| - If `null`, all datasets will be deleted. | |||||
| - If an array of IDs, only the specified datasets will be deleted. | |||||
| - If an empty array, no datasets will be deleted. | |||||
| responses: | responses: | ||||
| 200: | 200: | ||||
| description: Successful operation. | description: Successful operation. | ||||
| schema: | schema: | ||||
| type: object | type: object | ||||
| """ | """ | ||||
| req, err = validate_and_parse_json_request(request, DeleteDatasetReq) | |||||
| if err is not None: | |||||
| return get_error_argument_result(err) | |||||
| errors = [] | |||||
| success_count = 0 | |||||
| req = request.json | |||||
| if not req: | |||||
| ids = None | |||||
| else: | |||||
| ids = req.get("ids") | |||||
| if not ids: | |||||
| id_list = [] | |||||
| kbs = KnowledgebaseService.query(tenant_id=tenant_id) | |||||
| for kb in kbs: | |||||
| id_list.append(kb.id) | |||||
| kb_id_instance_pairs = [] | |||||
| if req["ids"] is None: | |||||
| try: | |||||
| kbs = KnowledgebaseService.query(tenant_id=tenant_id) | |||||
| for kb in kbs: | |||||
| kb_id_instance_pairs.append((kb.id, kb)) | |||||
| except OperationalError as e: | |||||
| logging.exception(e) | |||||
| return get_error_data_result(message="Database operation failed") | |||||
| else: | else: | ||||
| id_list = ids | |||||
| unique_id_list, duplicate_messages = check_duplicate_ids(id_list, "dataset") | |||||
| id_list = unique_id_list | |||||
| error_kb_ids = [] | |||||
| for kb_id in req["ids"]: | |||||
| try: | |||||
| kb = KnowledgebaseService.get_or_none(id=kb_id, tenant_id=tenant_id) | |||||
| if kb is None: | |||||
| error_kb_ids.append(kb_id) | |||||
| continue | |||||
| kb_id_instance_pairs.append((kb_id, kb)) | |||||
| except OperationalError as e: | |||||
| logging.exception(e) | |||||
| return get_error_data_result(message="Database operation failed") | |||||
| if len(error_kb_ids) > 0: | |||||
| return get_error_data_result(message=f"""User '{tenant_id}' lacks permission for datasets: '{", ".join(error_kb_ids)}'""") | |||||
| for id in id_list: | |||||
| kbs = KnowledgebaseService.query(id=id, tenant_id=tenant_id) | |||||
| if not kbs: | |||||
| errors.append(f"You don't own the dataset {id}") | |||||
| continue | |||||
| for doc in DocumentService.query(kb_id=id): | |||||
| if not DocumentService.remove_document(doc, tenant_id): | |||||
| errors.append(f"Remove document error for dataset {id}") | |||||
| errors = [] | |||||
| success_count = 0 | |||||
| for kb_id, kb in kb_id_instance_pairs: | |||||
| try: | |||||
| for doc in DocumentService.query(kb_id=kb_id): | |||||
| if not DocumentService.remove_document(doc, tenant_id): | |||||
| errors.append(f"Remove document '{doc.id}' error for dataset '{kb_id}'") | |||||
| continue | |||||
| f2d = File2DocumentService.get_by_document_id(doc.id) | |||||
| FileService.filter_delete( | |||||
| [ | |||||
| File.source_type == FileSource.KNOWLEDGEBASE, | |||||
| File.id == f2d[0].file_id, | |||||
| ] | |||||
| ) | |||||
| File2DocumentService.delete_by_document_id(doc.id) | |||||
| FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kb.name]) | |||||
| if not KnowledgebaseService.delete_by_id(kb_id): | |||||
| errors.append(f"Delete dataset error for {kb_id}") | |||||
| continue | continue | ||||
| f2d = File2DocumentService.get_by_document_id(doc.id) | |||||
| FileService.filter_delete( | |||||
| [ | |||||
| File.source_type == FileSource.KNOWLEDGEBASE, | |||||
| File.id == f2d[0].file_id, | |||||
| ] | |||||
| ) | |||||
| File2DocumentService.delete_by_document_id(doc.id) | |||||
| FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name]) | |||||
| if not KnowledgebaseService.delete_by_id(id): | |||||
| errors.append(f"Delete dataset error for {id}") | |||||
| continue | |||||
| success_count += 1 | |||||
| if errors: | |||||
| if success_count > 0: | |||||
| return get_result(data={"success_count": success_count, "errors": errors}, message=f"Partially deleted {success_count} datasets with {len(errors)} errors") | |||||
| else: | |||||
| return get_error_data_result(message="; ".join(errors)) | |||||
| if duplicate_messages: | |||||
| if success_count > 0: | |||||
| return get_result( | |||||
| message=f"Partially deleted {success_count} datasets with {len(duplicate_messages)} errors", | |||||
| data={"success_count": success_count, "errors": duplicate_messages}, | |||||
| ) | |||||
| else: | |||||
| return get_error_data_result(message=";".join(duplicate_messages)) | |||||
| return get_result(code=settings.RetCode.SUCCESS) | |||||
| success_count += 1 | |||||
| except OperationalError as e: | |||||
| logging.exception(e) | |||||
| return get_error_data_result(message="Database operation failed") | |||||
| if not errors: | |||||
| return get_result() | |||||
| error_message = f"Successfully deleted {success_count} datasets, {len(errors)} failed. Details: {'; '.join(errors)[:128]}..." | |||||
| if success_count == 0: | |||||
| return get_error_data_result(message=error_message) | |||||
| return get_result(data={"success_count": success_count, "errors": errors[:5]}, message=error_message) | |||||
| @manager.route("/datasets/<dataset_id>", methods=["PUT"]) # noqa: F821 | @manager.route("/datasets/<dataset_id>", methods=["PUT"]) # noqa: F821 | ||||
| logging.exception(e) | logging.exception(e) | ||||
| return get_error_data_result(message="Database operation failed") | return get_error_data_result(message="Database operation failed") | ||||
| return get_result(code=settings.RetCode.SUCCESS) | |||||
| return get_result() | |||||
| @manager.route("/datasets", methods=["GET"]) # noqa: F821 | @manager.route("/datasets", methods=["GET"]) # noqa: F821 |
| # limitations under the License. | # limitations under the License. | ||||
| # | # | ||||
| import uuid | import uuid | ||||
| from collections import Counter | |||||
| from enum import auto | from enum import auto | ||||
| from typing import Annotated, Any | from typing import Annotated, Any | ||||
| from flask import Request | from flask import Request | ||||
| from pydantic import UUID1, BaseModel, Field, StringConstraints, ValidationError, field_serializer, field_validator | from pydantic import UUID1, BaseModel, Field, StringConstraints, ValidationError, field_serializer, field_validator | ||||
| from pydantic_core import PydanticCustomError | |||||
| from strenum import StrEnum | from strenum import StrEnum | ||||
| from werkzeug.exceptions import BadRequest, UnsupportedMediaType | from werkzeug.exceptions import BadRequest, UnsupportedMediaType | ||||
| str: Validated Base64 string | str: Validated Base64 string | ||||
| Raises: | Raises: | ||||
| ValueError: For structural errors in these cases: | |||||
| PydanticCustomError: For structural errors in these cases: | |||||
| - Missing MIME prefix header | - Missing MIME prefix header | ||||
| - Invalid MIME prefix format | - Invalid MIME prefix format | ||||
| - Unsupported image MIME type | - Unsupported image MIME type | ||||
| if "," in v: | if "," in v: | ||||
| prefix, _ = v.split(",", 1) | prefix, _ = v.split(",", 1) | ||||
| if not prefix.startswith("data:"): | if not prefix.startswith("data:"): | ||||
| raise ValueError("Invalid MIME prefix format. Must start with 'data:'") | |||||
| raise PydanticCustomError("format_invalid", "Invalid MIME prefix format. Must start with 'data:'") | |||||
| mime_type = prefix[5:].split(";")[0] | mime_type = prefix[5:].split(";")[0] | ||||
| supported_mime_types = ["image/jpeg", "image/png"] | supported_mime_types = ["image/jpeg", "image/png"] | ||||
| if mime_type not in supported_mime_types: | if mime_type not in supported_mime_types: | ||||
| raise ValueError(f"Unsupported MIME type. Allowed: {supported_mime_types}") | |||||
| raise PydanticCustomError("format_invalid", "Unsupported MIME type. Allowed: {supported_mime_types}", {"supported_mime_types": supported_mime_types}) | |||||
| return v | return v | ||||
| else: | else: | ||||
| raise ValueError("Missing MIME prefix. Expected format: data:<mime>;base64,<data>") | |||||
| raise PydanticCustomError("format_invalid", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>") | |||||
| @field_validator("embedding_model", mode="after") | @field_validator("embedding_model", mode="after") | ||||
| @classmethod | @classmethod | ||||
| str: Validated <model_name>@<provider> format | str: Validated <model_name>@<provider> format | ||||
| Raises: | Raises: | ||||
| ValueError: For these violations: | |||||
| PydanticCustomError: For these violations: | |||||
| - Missing @ separator | - Missing @ separator | ||||
| - Empty model_name/provider | - Empty model_name/provider | ||||
| - Invalid component structure | - Invalid component structure | ||||
| Invalid: "text-embedding-3-large@" (empty provider) | Invalid: "text-embedding-3-large@" (empty provider) | ||||
| """ | """ | ||||
| if "@" not in v: | if "@" not in v: | ||||
| raise ValueError("Embedding model identifier must follow <model_name>@<provider> format") | |||||
| raise PydanticCustomError("format_invalid", "Embedding model identifier must follow <model_name>@<provider> format") | |||||
| components = v.split("@", 1) | components = v.split("@", 1) | ||||
| if len(components) != 2 or not all(components): | if len(components) != 2 or not all(components): | ||||
| raise ValueError("Both model_name and provider must be non-empty strings") | |||||
| raise PydanticCustomError("format_invalid", "Both model_name and provider must be non-empty strings") | |||||
| model_name, provider = components | model_name, provider = components | ||||
| if not model_name.strip() or not provider.strip(): | if not model_name.strip() or not provider.strip(): | ||||
| raise ValueError("Model name and provider cannot be whitespace-only strings") | |||||
| raise PydanticCustomError("format_invalid", "Model name and provider cannot be whitespace-only strings") | |||||
| return v | return v | ||||
| @field_validator("permission", mode="before") | @field_validator("permission", mode="before") | ||||
| ParserConfig | None: Validated configuration object | ParserConfig | None: Validated configuration object | ||||
| Raises: | Raises: | ||||
| ValueError: When serialized JSON exceeds 65,535 characters | |||||
| PydanticCustomError: When serialized JSON exceeds 65,535 characters | |||||
| """ | """ | ||||
| if v is None: | if v is None: | ||||
| return None | return None | ||||
| if (json_str := v.model_dump_json()) and len(json_str) > 65535: | if (json_str := v.model_dump_json()) and len(json_str) > 65535: | ||||
| raise ValueError(f"Parser config exceeds size limit (max 65,535 characters). Current size: {len(json_str):,}") | |||||
| raise PydanticCustomError("string_too_long", "Parser config exceeds size limit (max 65,535 characters). Current size: {actual}", {"actual": len(json_str)}) | |||||
| return v | return v | ||||
| @field_serializer("dataset_id") | @field_serializer("dataset_id") | ||||
| def serialize_uuid_to_hex(self, v: uuid.UUID) -> str: | def serialize_uuid_to_hex(self, v: uuid.UUID) -> str: | ||||
| """ | |||||
| Serializes a UUID version 1 object to its hexadecimal string representation. | |||||
| This field serializer specifically handles UUID version 1 objects, converting them | |||||
| to their canonical 32-character hexadecimal format without hyphens. The conversion | |||||
| is designed for consistent serialization in API responses and database storage. | |||||
| Args: | |||||
| v (uuid.UUID1): The UUID version 1 object to serialize. Must be a valid | |||||
| UUID1 instance generated by Python's uuid module. | |||||
| Returns: | |||||
| str: 32-character lowercase hexadecimal string representation | |||||
| Example: "550e8400e29b41d4a716446655440000" | |||||
| Raises: | |||||
| AttributeError: If input is not a proper UUID object (missing hex attribute) | |||||
| TypeError: If input is not a UUID1 instance (when type checking is enabled) | |||||
| Notes: | |||||
| - Version 1 UUIDs contain timestamp and MAC address information | |||||
| - The .hex property automatically converts to lowercase hexadecimal | |||||
| - For cross-version compatibility, consider typing as uuid.UUID instead | |||||
| """ | |||||
| return v.hex | return v.hex | ||||
| class DeleteReq(Base): | |||||
| ids: list[UUID1] | None = Field(...) | |||||
| @field_validator("ids", mode="after") | |||||
| def check_duplicate_ids(cls, v: list[UUID1] | None) -> list[str] | None: | |||||
| """ | |||||
| Validates and converts a list of UUID1 objects to hexadecimal strings while checking for duplicates. | |||||
| This validator implements a three-stage processing pipeline: | |||||
| 1. Null Handling - returns None for empty/null input | |||||
| 2. UUID Conversion - transforms UUID objects to hex strings | |||||
| 3. Duplicate Validation - ensures all IDs are unique | |||||
| Behavior Specifications: | |||||
| - Input: None → Returns None (indicates no operation) | |||||
| - Input: [] → Returns [] (empty list for explicit no-op) | |||||
| - Input: [UUID1,...] → Returns validated hex strings | |||||
| - Duplicates: Raises formatted PydanticCustomError | |||||
| Args: | |||||
| v (list[UUID1] | None): | |||||
| - None: Indicates no datasets should be processed | |||||
| - Empty list: Explicit empty operation | |||||
| - Populated list: Dataset UUIDs to validate/convert | |||||
| Returns: | |||||
| list[str] | None: | |||||
| - None when input is None | |||||
| - List of 32-character hex strings (lowercase, no hyphens) | |||||
| Example: ["550e8400e29b41d4a716446655440000"] | |||||
| Raises: | |||||
| PydanticCustomError: When duplicates detected, containing: | |||||
| - Error type: "duplicate_uuids" | |||||
| - Template message: "Duplicate ids: '{duplicate_ids}'" | |||||
| - Context: {"duplicate_ids": "id1, id2, ..."} | |||||
| Example: | |||||
| >>> validate([UUID("..."), UUID("...")]) | |||||
| ["2cdf0456e9a711ee8000000000000000", ...] | |||||
| >>> validate([UUID("..."), UUID("...")]) # Duplicates | |||||
| PydanticCustomError: Duplicate ids: '2cdf0456e9a711ee8000000000000000' | |||||
| """ | |||||
| if not v: | |||||
| return v | |||||
| uuid_hex_list = [ids.hex for ids in v] | |||||
| duplicates = [item for item, count in Counter(uuid_hex_list).items() if count > 1] | |||||
| if duplicates: | |||||
| duplicates_str = ", ".join(duplicates) | |||||
| raise PydanticCustomError("duplicate_uuids", "Duplicate ids: '{duplicate_ids}'", {"duplicate_ids": duplicates_str}) | |||||
| return uuid_hex_list | |||||
| class DeleteDatasetReq(DeleteReq): ... |
| - `'content-Type: application/json'` | - `'content-Type: application/json'` | ||||
| - `'Authorization: Bearer <YOUR_API_KEY>'` | - `'Authorization: Bearer <YOUR_API_KEY>'` | ||||
| - Body: | - Body: | ||||
| - `"ids"`: `list[string]` | |||||
| - `"ids"`: `list[string]` or `null` | |||||
| ##### Request example | ##### Request example | ||||
| --header 'Content-Type: application/json' \ | --header 'Content-Type: application/json' \ | ||||
| --header 'Authorization: Bearer <YOUR_API_KEY>' \ | --header 'Authorization: Bearer <YOUR_API_KEY>' \ | ||||
| --data '{ | --data '{ | ||||
| "ids": ["test_1", "test_2"] | |||||
| "ids": ["d94a8dc02c9711f0930f7fbc369eab6d", "e94a8dc02c9711f0930f7fbc369eab6e"] | |||||
| }' | }' | ||||
| ``` | ``` | ||||
| ##### Request parameters | ##### Request parameters | ||||
| - `"ids"`: (*Body parameter*), `list[string]` | |||||
| The IDs of the datasets to delete. If it is not specified, all datasets will be deleted. | |||||
| - `"ids"`: (*Body parameter*), `list[string]` or `null`, *Required* | |||||
| Specifies the datasets to delete: | |||||
| - If `null`, all datasets will be deleted. | |||||
| - If an array of IDs, only the specified datasets will be deleted. | |||||
| - If an empty array, no datasets will be deleted. | |||||
| #### Response | #### Response | ||||
| ### Delete datasets | ### Delete datasets | ||||
| ```python | ```python | ||||
| RAGFlow.delete_datasets(ids: list[str] = None) | |||||
| RAGFlow.delete_datasets(ids: list[str] | None = None) | |||||
| ``` | ``` | ||||
| Deletes datasets by ID. | Deletes datasets by ID. | ||||
| #### Parameters | #### Parameters | ||||
| ##### ids: `list[str]`, *Required* | |||||
| ##### ids: `list[str]` or `None`, *Required* | |||||
| The IDs of the datasets to delete. Defaults to `None`. If it is not specified, all datasets will be deleted. | |||||
| The IDs of the datasets to delete. Defaults to `None`. | |||||
| - If `None`, all datasets will be deleted. | |||||
| - If an array of IDs, only the specified datasets will be deleted. | |||||
| - If an empty array, no datasets will be deleted. | |||||
| #### Returns | #### Returns | ||||
| #### Examples | #### Examples | ||||
| ```python | ```python | ||||
| rag_object.delete_datasets(ids=["id_1","id_2"]) | |||||
| rag_object.delete_datasets(ids=["d94a8dc02c9711f0930f7fbc369eab6d","e94a8dc02c9711f0930f7fbc369eab6e"]) | |||||
| ``` | ``` | ||||
| --- | --- |
| @pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
| def clear_datasets(request, get_http_api_auth): | def clear_datasets(request, get_http_api_auth): | ||||
| def cleanup(): | def cleanup(): | ||||
| delete_datasets(get_http_api_auth) | |||||
| delete_datasets(get_http_api_auth, {"ids": None}) | |||||
| request.addfinalizer(cleanup) | request.addfinalizer(cleanup) | ||||
| @pytest.fixture(scope="class") | @pytest.fixture(scope="class") | ||||
| def add_dataset(request, get_http_api_auth): | def add_dataset(request, get_http_api_auth): | ||||
| def cleanup(): | def cleanup(): | ||||
| delete_datasets(get_http_api_auth) | |||||
| delete_datasets(get_http_api_auth, {"ids": None}) | |||||
| request.addfinalizer(cleanup) | request.addfinalizer(cleanup) | ||||
| @pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
| def add_dataset_func(request, get_http_api_auth): | def add_dataset_func(request, get_http_api_auth): | ||||
| def cleanup(): | def cleanup(): | ||||
| delete_datasets(get_http_api_auth) | |||||
| delete_datasets(get_http_api_auth, {"ids": None}) | |||||
| request.addfinalizer(cleanup) | request.addfinalizer(cleanup) | ||||
| dataset_ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| return dataset_ids[0] | |||||
| return batch_create_datasets(get_http_api_auth, 1)[0] | |||||
| @pytest.fixture(scope="class") | @pytest.fixture(scope="class") |
| @pytest.fixture(scope="class") | @pytest.fixture(scope="class") | ||||
| def add_datasets(get_http_api_auth, request): | def add_datasets(get_http_api_auth, request): | ||||
| def cleanup(): | def cleanup(): | ||||
| delete_datasets(get_http_api_auth) | |||||
| delete_datasets(get_http_api_auth, {"ids": None}) | |||||
| request.addfinalizer(cleanup) | request.addfinalizer(cleanup) | ||||
| @pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
| def add_datasets_func(get_http_api_auth, request): | def add_datasets_func(get_http_api_auth, request): | ||||
| def cleanup(): | def cleanup(): | ||||
| delete_datasets(get_http_api_auth) | |||||
| delete_datasets(get_http_api_auth, {"ids": None}) | |||||
| request.addfinalizer(cleanup) | request.addfinalizer(cleanup) | ||||
| return batch_create_datasets(get_http_api_auth, 3) | return batch_create_datasets(get_http_api_auth, 3) | ||||
| @pytest.fixture(scope="function") | |||||
| def add_dataset_func(get_http_api_auth, request): | |||||
| def cleanup(): | |||||
| delete_datasets(get_http_api_auth) | |||||
| request.addfinalizer(cleanup) | |||||
| return batch_create_datasets(get_http_api_auth, 1)[0] |
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| @pytest.mark.p1 | |||||
| class TestAuthorization: | class TestAuthorization: | ||||
| @pytest.mark.p1 | |||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "auth, expected_code, expected_message", | "auth, expected_code, expected_message", | ||||
| [ | [ | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, auth, expected_code, expected_message): | |||||
| def test_auth_invalid(self, auth, expected_code, expected_message): | |||||
| res = delete_datasets(auth) | res = delete_datasets(auth) | ||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| class TestDatasetsDeletion: | |||||
| class TestRquest: | |||||
| @pytest.mark.p3 | |||||
| def test_content_type_bad(self, get_http_api_auth): | |||||
| BAD_CONTENT_TYPE = "text/xml" | |||||
| res = delete_datasets(get_http_api_auth, headers={"Content-Type": BAD_CONTENT_TYPE}) | |||||
| assert res["code"] == 101, res | |||||
| assert res["message"] == f"Unsupported content type: Expected application/json, got {BAD_CONTENT_TYPE}", res | |||||
| @pytest.mark.p3 | |||||
| @pytest.mark.parametrize( | |||||
| "payload, expected_message", | |||||
| [ | |||||
| ("a", "Malformed JSON syntax: Missing commas/brackets or invalid encoding"), | |||||
| ('"a"', "Invalid request payload: expected object, got str"), | |||||
| ], | |||||
| ids=["malformed_json_syntax", "invalid_request_payload_type"], | |||||
| ) | |||||
| def test_payload_bad(self, get_http_api_auth, payload, expected_message): | |||||
| res = delete_datasets(get_http_api_auth, data=payload) | |||||
| assert res["code"] == 101, res | |||||
| assert res["message"] == expected_message, res | |||||
| @pytest.mark.p3 | |||||
| def test_payload_unset(self, get_http_api_auth): | |||||
| res = delete_datasets(get_http_api_auth, None) | |||||
| assert res["code"] == 101, res | |||||
| assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res | |||||
| class TestCapability: | |||||
| @pytest.mark.p3 | |||||
| def test_delete_dataset_1k(self, get_http_api_auth): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1_000) | |||||
| res = delete_datasets(get_http_api_auth, {"ids": ids}) | |||||
| assert res["code"] == 0, res | |||||
| res = list_datasets(get_http_api_auth) | |||||
| assert len(res["data"]) == 0, res | |||||
| @pytest.mark.p3 | |||||
| def test_concurrent_deletion(self, get_http_api_auth): | |||||
| dataset_num = 1_000 | |||||
| ids = batch_create_datasets(get_http_api_auth, dataset_num) | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | |||||
| futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(dataset_num)] | |||||
| responses = [f.result() for f in futures] | |||||
| assert all(r["code"] == 0 for r in responses), responses | |||||
| class TestDatasetsDelete: | |||||
| @pytest.mark.p1 | @pytest.mark.p1 | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "payload, expected_code, expected_message, remaining", | |||||
| "func, expected_code, expected_message, remaining", | |||||
| [ | [ | ||||
| (None, 0, "", 0), | |||||
| ({"ids": []}, 0, "", 0), | |||||
| ({"ids": ["invalid_id"]}, 102, "You don't own the dataset invalid_id", 3), | |||||
| ( | |||||
| {"ids": ["\n!?。;!?\"'"]}, | |||||
| 102, | |||||
| "You don't own the dataset \n!?。;!?\"'", | |||||
| 3, | |||||
| ), | |||||
| ( | |||||
| "not json", | |||||
| 100, | |||||
| "AttributeError(\"'str' object has no attribute 'get'\")", | |||||
| 3, | |||||
| ), | |||||
| (lambda r: {"ids": r[:1]}, 0, "", 2), | (lambda r: {"ids": r[:1]}, 0, "", 2), | ||||
| (lambda r: {"ids": r}, 0, "", 0), | (lambda r: {"ids": r}, 0, "", 0), | ||||
| ], | ], | ||||
| ids=["single_dataset", "multiple_datasets"], | |||||
| ) | ) | ||||
| def test_basic_scenarios(self, get_http_api_auth, add_datasets_func, payload, expected_code, expected_message, remaining): | |||||
| def test_ids(self, get_http_api_auth, add_datasets_func, func, expected_code, expected_message, remaining): | |||||
| dataset_ids = add_datasets_func | dataset_ids = add_datasets_func | ||||
| if callable(payload): | |||||
| payload = payload(dataset_ids) | |||||
| if callable(func): | |||||
| payload = func(dataset_ids) | |||||
| res = delete_datasets(get_http_api_auth, payload) | res = delete_datasets(get_http_api_auth, payload) | ||||
| assert res["code"] == expected_code | |||||
| if res["code"] != 0: | |||||
| assert res["message"] == expected_message | |||||
| assert res["code"] == expected_code, res | |||||
| res = list_datasets(get_http_api_auth) | |||||
| assert len(res["data"]) == remaining, res | |||||
| @pytest.mark.p1 | |||||
| @pytest.mark.usefixtures("add_dataset_func") | |||||
| def test_ids_empty(self, get_http_api_auth): | |||||
| payload = {"ids": []} | |||||
| res = delete_datasets(get_http_api_auth, payload) | |||||
| assert res["code"] == 0, res | |||||
| res = list_datasets(get_http_api_auth) | |||||
| assert len(res["data"]) == 1, res | |||||
| @pytest.mark.p1 | |||||
| @pytest.mark.usefixtures("add_datasets_func") | |||||
| def test_ids_none(self, get_http_api_auth): | |||||
| payload = {"ids": None} | |||||
| res = delete_datasets(get_http_api_auth, payload) | |||||
| assert res["code"] == 0, res | |||||
| res = list_datasets(get_http_api_auth) | |||||
| assert len(res["data"]) == 0, res | |||||
| @pytest.mark.p2 | |||||
| @pytest.mark.usefixtures("add_dataset_func") | |||||
| def test_id_not_uuid(self, get_http_api_auth): | |||||
| payload = {"ids": ["not_uuid"]} | |||||
| res = delete_datasets(get_http_api_auth, payload) | |||||
| assert res["code"] == 101, res | |||||
| assert "Input should be a valid UUID" in res["message"], res | |||||
| res = list_datasets(get_http_api_auth) | |||||
| assert len(res["data"]) == 1, res | |||||
| @pytest.mark.p2 | |||||
| @pytest.mark.usefixtures("add_dataset_func") | |||||
| def test_id_wrong_uuid(self, get_http_api_auth): | |||||
| payload = {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"]} | |||||
| res = delete_datasets(get_http_api_auth, payload) | |||||
| assert res["code"] == 102, res | |||||
| assert "lacks permission for dataset" in res["message"], res | |||||
| res = list_datasets(get_http_api_auth) | res = list_datasets(get_http_api_auth) | ||||
| assert len(res["data"]) == remaining | |||||
| assert len(res["data"]) == 1, res | |||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "payload", | |||||
| "func", | |||||
| [ | [ | ||||
| lambda r: {"ids": ["invalid_id"] + r}, | |||||
| lambda r: {"ids": r[:1] + ["invalid_id"] + r[1:3]}, | |||||
| lambda r: {"ids": r + ["invalid_id"]}, | |||||
| lambda r: {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"] + r}, | |||||
| lambda r: {"ids": r[:1] + ["d94a8dc02c9711f0930f7fbc369eab6d"] + r[1:3]}, | |||||
| lambda r: {"ids": r + ["d94a8dc02c9711f0930f7fbc369eab6d"]}, | |||||
| ], | ], | ||||
| ) | ) | ||||
| def test_delete_partial_invalid_id(self, get_http_api_auth, add_datasets_func, payload): | |||||
| def test_ids_partial_invalid(self, get_http_api_auth, add_datasets_func, func): | |||||
| dataset_ids = add_datasets_func | dataset_ids = add_datasets_func | ||||
| if callable(payload): | |||||
| payload = payload(dataset_ids) | |||||
| if callable(func): | |||||
| payload = func(dataset_ids) | |||||
| res = delete_datasets(get_http_api_auth, payload) | res = delete_datasets(get_http_api_auth, payload) | ||||
| assert res["code"] == 0 | |||||
| assert res["data"]["errors"][0] == "You don't own the dataset invalid_id" | |||||
| assert res["data"]["success_count"] == 3 | |||||
| assert res["code"] == 102, res | |||||
| assert "lacks permission for dataset" in res["message"], res | |||||
| res = list_datasets(get_http_api_auth) | res = list_datasets(get_http_api_auth) | ||||
| assert len(res["data"]) == 0 | |||||
| assert len(res["data"]) == 3, res | |||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_repeated_deletion(self, get_http_api_auth, add_datasets_func): | |||||
| def test_ids_duplicate(self, get_http_api_auth, add_datasets_func): | |||||
| dataset_ids = add_datasets_func | dataset_ids = add_datasets_func | ||||
| res = delete_datasets(get_http_api_auth, {"ids": dataset_ids}) | |||||
| assert res["code"] == 0 | |||||
| payload = {"ids": dataset_ids + dataset_ids} | |||||
| res = delete_datasets(get_http_api_auth, payload) | |||||
| assert res["code"] == 101, res | |||||
| assert "Duplicate ids:" in res["message"], res | |||||
| res = delete_datasets(get_http_api_auth, {"ids": dataset_ids}) | |||||
| assert res["code"] == 102 | |||||
| assert "You don't own the dataset" in res["message"] | |||||
| res = list_datasets(get_http_api_auth) | |||||
| assert len(res["data"]) == 3, res | |||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_duplicate_deletion(self, get_http_api_auth, add_datasets_func): | |||||
| def test_repeated_delete(self, get_http_api_auth, add_datasets_func): | |||||
| dataset_ids = add_datasets_func | dataset_ids = add_datasets_func | ||||
| res = delete_datasets(get_http_api_auth, {"ids": dataset_ids + dataset_ids}) | |||||
| assert res["code"] == 0 | |||||
| assert "Duplicate dataset ids" in res["data"]["errors"][0] | |||||
| assert res["data"]["success_count"] == 3 | |||||
| res = list_datasets(get_http_api_auth) | |||||
| assert len(res["data"]) == 0 | |||||
| @pytest.mark.p3 | |||||
| def test_concurrent_deletion(self, get_http_api_auth): | |||||
| ids = batch_create_datasets(get_http_api_auth, 100) | |||||
| payload = {"ids": dataset_ids} | |||||
| res = delete_datasets(get_http_api_auth, payload) | |||||
| assert res["code"] == 0, res | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | |||||
| futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(100)] | |||||
| responses = [f.result() for f in futures] | |||||
| assert all(r["code"] == 0 for r in responses) | |||||
| res = delete_datasets(get_http_api_auth, payload) | |||||
| assert res["code"] == 102, res | |||||
| assert "lacks permission for dataset" in res["message"], res | |||||
| @pytest.mark.p3 | |||||
| def test_delete_10k(self, get_http_api_auth): | |||||
| ids = batch_create_datasets(get_http_api_auth, 10_000) | |||||
| res = delete_datasets(get_http_api_auth, {"ids": ids}) | |||||
| assert res["code"] == 0 | |||||
| @pytest.mark.p2 | |||||
| @pytest.mark.usefixtures("add_dataset_func") | |||||
| def test_field_unsupported(self, get_http_api_auth): | |||||
| payload = {"unknown_field": "unknown_field"} | |||||
| res = delete_datasets(get_http_api_auth, payload) | |||||
| assert res["code"] == 101, res | |||||
| assert "Extra inputs are not permitted" in res["message"], res | |||||
| res = list_datasets(get_http_api_auth) | res = list_datasets(get_http_api_auth) | ||||
| assert len(res["data"]) == 0 | |||||
| assert len(res["data"]) == 1, res |
| assert res["code"] == 101, res | assert res["code"] == 101, res | ||||
| assert res["message"] == "No properties were modified", res | assert res["message"] == "No properties were modified", res | ||||
| @pytest.mark.p3 | |||||
| def test_payload_unset(self, get_http_api_auth, add_dataset_func): | |||||
| dataset_id = add_dataset_func | |||||
| res = update_dataset(get_http_api_auth, dataset_id, None) | |||||
| assert res["code"] == 101, res | |||||
| assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res | |||||
| class TestCapability: | class TestCapability: | ||||
| @pytest.mark.p3 | @pytest.mark.p3 |