Explorar el Código

Refa: HTTP API delete dataset / test cases / docs (#7657)

### What problem does this PR solve?

This PR introduces Pydantic-based validation for the delete dataset HTTP
API, improving code clarity and robustness. Key changes include:

1. Pydantic Validation
2. Error Handling
3. Test Updates
4. Documentation Updates

### Type of change

- [x] Documentation Update
- [x] Refactoring
tags/v0.19.0
liu an hace 5 meses
padre
commit
ae8b628f0a
No account linked to committer's email address

+ 67
- 56
api/apps/sdk/dataset.py Ver fichero

from flask import request from flask import request
from peewee import OperationalError from peewee import OperationalError


from api import settings
from api.db import FileSource, StatusEnum from api.db import FileSource, StatusEnum
from api.db.db_models import File from api.db.db_models import File
from api.db.services.document_service import DocumentService from api.db.services.document_service import DocumentService
from api.db.services.user_service import TenantService from api.db.services.user_service import TenantService
from api.utils import get_uuid from api.utils import get_uuid
from api.utils.api_utils import ( from api.utils.api_utils import (
check_duplicate_ids,
deep_merge, deep_merge,
get_error_argument_result, get_error_argument_result,
get_error_data_result, get_error_data_result,
token_required, token_required,
verify_embedding_availability, verify_embedding_availability,
) )
from api.utils.validation_utils import CreateDatasetReq, UpdateDatasetReq, validate_and_parse_json_request
from api.utils.validation_utils import CreateDatasetReq, DeleteDatasetReq, UpdateDatasetReq, validate_and_parse_json_request




@manager.route("/datasets", methods=["POST"]) # noqa: F821 @manager.route("/datasets", methods=["POST"]) # noqa: F821
required: true required: true
schema: schema:
type: object type: object
required:
- ids
properties: properties:
ids: ids:
type: array
type: array or null
items: items:
type: string type: string
description: List of dataset IDs to delete.
description: |
Specifies the datasets to delete:
- If `null`, all datasets will be deleted.
- If an array of IDs, only the specified datasets will be deleted.
- If an empty array, no datasets will be deleted.
responses: responses:
200: 200:
description: Successful operation. description: Successful operation.
schema: schema:
type: object type: object
""" """
req, err = validate_and_parse_json_request(request, DeleteDatasetReq)
if err is not None:
return get_error_argument_result(err)


errors = []
success_count = 0
req = request.json
if not req:
ids = None
else:
ids = req.get("ids")
if not ids:
id_list = []
kbs = KnowledgebaseService.query(tenant_id=tenant_id)
for kb in kbs:
id_list.append(kb.id)
kb_id_instance_pairs = []
if req["ids"] is None:
try:
kbs = KnowledgebaseService.query(tenant_id=tenant_id)
for kb in kbs:
kb_id_instance_pairs.append((kb.id, kb))
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
else: else:
id_list = ids
unique_id_list, duplicate_messages = check_duplicate_ids(id_list, "dataset")
id_list = unique_id_list
error_kb_ids = []
for kb_id in req["ids"]:
try:
kb = KnowledgebaseService.get_or_none(id=kb_id, tenant_id=tenant_id)
if kb is None:
error_kb_ids.append(kb_id)
continue
kb_id_instance_pairs.append((kb_id, kb))
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
if len(error_kb_ids) > 0:
return get_error_data_result(message=f"""User '{tenant_id}' lacks permission for datasets: '{", ".join(error_kb_ids)}'""")


for id in id_list:
kbs = KnowledgebaseService.query(id=id, tenant_id=tenant_id)
if not kbs:
errors.append(f"You don't own the dataset {id}")
continue
for doc in DocumentService.query(kb_id=id):
if not DocumentService.remove_document(doc, tenant_id):
errors.append(f"Remove document error for dataset {id}")
errors = []
success_count = 0
for kb_id, kb in kb_id_instance_pairs:
try:
for doc in DocumentService.query(kb_id=kb_id):
if not DocumentService.remove_document(doc, tenant_id):
errors.append(f"Remove document '{doc.id}' error for dataset '{kb_id}'")
continue
f2d = File2DocumentService.get_by_document_id(doc.id)
FileService.filter_delete(
[
File.source_type == FileSource.KNOWLEDGEBASE,
File.id == f2d[0].file_id,
]
)
File2DocumentService.delete_by_document_id(doc.id)
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kb.name])
if not KnowledgebaseService.delete_by_id(kb_id):
errors.append(f"Delete dataset error for {kb_id}")
continue continue
f2d = File2DocumentService.get_by_document_id(doc.id)
FileService.filter_delete(
[
File.source_type == FileSource.KNOWLEDGEBASE,
File.id == f2d[0].file_id,
]
)
File2DocumentService.delete_by_document_id(doc.id)
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name])
if not KnowledgebaseService.delete_by_id(id):
errors.append(f"Delete dataset error for {id}")
continue
success_count += 1
if errors:
if success_count > 0:
return get_result(data={"success_count": success_count, "errors": errors}, message=f"Partially deleted {success_count} datasets with {len(errors)} errors")
else:
return get_error_data_result(message="; ".join(errors))
if duplicate_messages:
if success_count > 0:
return get_result(
message=f"Partially deleted {success_count} datasets with {len(duplicate_messages)} errors",
data={"success_count": success_count, "errors": duplicate_messages},
)
else:
return get_error_data_result(message=";".join(duplicate_messages))
return get_result(code=settings.RetCode.SUCCESS)
success_count += 1
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")

if not errors:
return get_result()

error_message = f"Successfully deleted {success_count} datasets, {len(errors)} failed. Details: {'; '.join(errors)[:128]}..."
if success_count == 0:
return get_error_data_result(message=error_message)

return get_result(data={"success_count": success_count, "errors": errors[:5]}, message=error_message)




@manager.route("/datasets/<dataset_id>", methods=["PUT"]) # noqa: F821 @manager.route("/datasets/<dataset_id>", methods=["PUT"]) # noqa: F821
logging.exception(e) logging.exception(e)
return get_error_data_result(message="Database operation failed") return get_error_data_result(message="Database operation failed")


return get_result(code=settings.RetCode.SUCCESS)
return get_result()




@manager.route("/datasets", methods=["GET"]) # noqa: F821 @manager.route("/datasets", methods=["GET"]) # noqa: F821

+ 96
- 10
api/utils/validation_utils.py Ver fichero

# limitations under the License. # limitations under the License.
# #
import uuid import uuid
from collections import Counter
from enum import auto from enum import auto
from typing import Annotated, Any from typing import Annotated, Any


from flask import Request from flask import Request
from pydantic import UUID1, BaseModel, Field, StringConstraints, ValidationError, field_serializer, field_validator from pydantic import UUID1, BaseModel, Field, StringConstraints, ValidationError, field_serializer, field_validator
from pydantic_core import PydanticCustomError
from strenum import StrEnum from strenum import StrEnum
from werkzeug.exceptions import BadRequest, UnsupportedMediaType from werkzeug.exceptions import BadRequest, UnsupportedMediaType


str: Validated Base64 string str: Validated Base64 string


Raises: Raises:
ValueError: For structural errors in these cases:
PydanticCustomError: For structural errors in these cases:
- Missing MIME prefix header - Missing MIME prefix header
- Invalid MIME prefix format - Invalid MIME prefix format
- Unsupported image MIME type - Unsupported image MIME type
if "," in v: if "," in v:
prefix, _ = v.split(",", 1) prefix, _ = v.split(",", 1)
if not prefix.startswith("data:"): if not prefix.startswith("data:"):
raise ValueError("Invalid MIME prefix format. Must start with 'data:'")
raise PydanticCustomError("format_invalid", "Invalid MIME prefix format. Must start with 'data:'")


mime_type = prefix[5:].split(";")[0] mime_type = prefix[5:].split(";")[0]
supported_mime_types = ["image/jpeg", "image/png"] supported_mime_types = ["image/jpeg", "image/png"]
if mime_type not in supported_mime_types: if mime_type not in supported_mime_types:
raise ValueError(f"Unsupported MIME type. Allowed: {supported_mime_types}")
raise PydanticCustomError("format_invalid", "Unsupported MIME type. Allowed: {supported_mime_types}", {"supported_mime_types": supported_mime_types})


return v return v
else: else:
raise ValueError("Missing MIME prefix. Expected format: data:<mime>;base64,<data>")
raise PydanticCustomError("format_invalid", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>")


@field_validator("embedding_model", mode="after") @field_validator("embedding_model", mode="after")
@classmethod @classmethod
str: Validated <model_name>@<provider> format str: Validated <model_name>@<provider> format


Raises: Raises:
ValueError: For these violations:
PydanticCustomError: For these violations:
- Missing @ separator - Missing @ separator
- Empty model_name/provider - Empty model_name/provider
- Invalid component structure - Invalid component structure
Invalid: "text-embedding-3-large@" (empty provider) Invalid: "text-embedding-3-large@" (empty provider)
""" """
if "@" not in v: if "@" not in v:
raise ValueError("Embedding model identifier must follow <model_name>@<provider> format")
raise PydanticCustomError("format_invalid", "Embedding model identifier must follow <model_name>@<provider> format")


components = v.split("@", 1) components = v.split("@", 1)
if len(components) != 2 or not all(components): if len(components) != 2 or not all(components):
raise ValueError("Both model_name and provider must be non-empty strings")
raise PydanticCustomError("format_invalid", "Both model_name and provider must be non-empty strings")


model_name, provider = components model_name, provider = components
if not model_name.strip() or not provider.strip(): if not model_name.strip() or not provider.strip():
raise ValueError("Model name and provider cannot be whitespace-only strings")
raise PydanticCustomError("format_invalid", "Model name and provider cannot be whitespace-only strings")
return v return v


@field_validator("permission", mode="before") @field_validator("permission", mode="before")
ParserConfig | None: Validated configuration object ParserConfig | None: Validated configuration object


Raises: Raises:
ValueError: When serialized JSON exceeds 65,535 characters
PydanticCustomError: When serialized JSON exceeds 65,535 characters
""" """
if v is None: if v is None:
return None return None


if (json_str := v.model_dump_json()) and len(json_str) > 65535: if (json_str := v.model_dump_json()) and len(json_str) > 65535:
raise ValueError(f"Parser config exceeds size limit (max 65,535 characters). Current size: {len(json_str):,}")
raise PydanticCustomError("string_too_long", "Parser config exceeds size limit (max 65,535 characters). Current size: {actual}", {"actual": len(json_str)})
return v return v






@field_serializer("dataset_id") @field_serializer("dataset_id")
def serialize_uuid_to_hex(self, v: uuid.UUID) -> str: def serialize_uuid_to_hex(self, v: uuid.UUID) -> str:
"""
Serializes a UUID version 1 object to its hexadecimal string representation.

This field serializer specifically handles UUID version 1 objects, converting them
to their canonical 32-character hexadecimal format without hyphens. The conversion
is designed for consistent serialization in API responses and database storage.

Args:
v (uuid.UUID1): The UUID version 1 object to serialize. Must be a valid
UUID1 instance generated by Python's uuid module.

Returns:
str: 32-character lowercase hexadecimal string representation
Example: "550e8400e29b41d4a716446655440000"

Raises:
AttributeError: If input is not a proper UUID object (missing hex attribute)
TypeError: If input is not a UUID1 instance (when type checking is enabled)

Notes:
- Version 1 UUIDs contain timestamp and MAC address information
- The .hex property automatically converts to lowercase hexadecimal
- For cross-version compatibility, consider typing as uuid.UUID instead
"""
return v.hex return v.hex


class DeleteReq(Base):
ids: list[UUID1] | None = Field(...)

@field_validator("ids", mode="after")
def check_duplicate_ids(cls, v: list[UUID1] | None) -> list[str] | None:
"""
Validates and converts a list of UUID1 objects to hexadecimal strings while checking for duplicates.

This validator implements a three-stage processing pipeline:
1. Null Handling - returns None for empty/null input
2. UUID Conversion - transforms UUID objects to hex strings
3. Duplicate Validation - ensures all IDs are unique

Behavior Specifications:
- Input: None → Returns None (indicates no operation)
- Input: [] → Returns [] (empty list for explicit no-op)
- Input: [UUID1,...] → Returns validated hex strings
- Duplicates: Raises formatted PydanticCustomError

Args:
v (list[UUID1] | None):
- None: Indicates no datasets should be processed
- Empty list: Explicit empty operation
- Populated list: Dataset UUIDs to validate/convert

Returns:
list[str] | None:
- None when input is None
- List of 32-character hex strings (lowercase, no hyphens)
Example: ["550e8400e29b41d4a716446655440000"]

Raises:
PydanticCustomError: When duplicates detected, containing:
- Error type: "duplicate_uuids"
- Template message: "Duplicate ids: '{duplicate_ids}'"
- Context: {"duplicate_ids": "id1, id2, ..."}

Example:
>>> validate([UUID("..."), UUID("...")])
["2cdf0456e9a711ee8000000000000000", ...]

>>> validate([UUID("..."), UUID("...")]) # Duplicates
PydanticCustomError: Duplicate ids: '2cdf0456e9a711ee8000000000000000'
"""
if not v:
return v

uuid_hex_list = [ids.hex for ids in v]
duplicates = [item for item, count in Counter(uuid_hex_list).items() if count > 1]

if duplicates:
duplicates_str = ", ".join(duplicates)
raise PydanticCustomError("duplicate_uuids", "Duplicate ids: '{duplicate_ids}'", {"duplicate_ids": duplicates_str})

return uuid_hex_list


class DeleteDatasetReq(DeleteReq): ...

+ 7
- 4
docs/references/http_api_reference.md Ver fichero

- `'content-Type: application/json'` - `'content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'` - `'Authorization: Bearer <YOUR_API_KEY>'`
- Body: - Body:
- `"ids"`: `list[string]`
- `"ids"`: `list[string]` or `null`


##### Request example ##### Request example


--header 'Content-Type: application/json' \ --header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \ --header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{ --data '{
"ids": ["test_1", "test_2"]
"ids": ["d94a8dc02c9711f0930f7fbc369eab6d", "e94a8dc02c9711f0930f7fbc369eab6e"]
}' }'
``` ```


##### Request parameters ##### Request parameters


- `"ids"`: (*Body parameter*), `list[string]`
The IDs of the datasets to delete. If it is not specified, all datasets will be deleted.
- `"ids"`: (*Body parameter*), `list[string]` or `null`, *Required*
Specifies the datasets to delete:
- If `null`, all datasets will be deleted.
- If an array of IDs, only the specified datasets will be deleted.
- If an empty array, no datasets will be deleted.


#### Response #### Response



+ 7
- 4
docs/references/python_api_reference.md Ver fichero

### Delete datasets ### Delete datasets


```python ```python
RAGFlow.delete_datasets(ids: list[str] = None)
RAGFlow.delete_datasets(ids: list[str] | None = None)
``` ```


Deletes datasets by ID. Deletes datasets by ID.


#### Parameters #### Parameters


##### ids: `list[str]`, *Required*
##### ids: `list[str]` or `None`, *Required*


The IDs of the datasets to delete. Defaults to `None`. If it is not specified, all datasets will be deleted.
The IDs of the datasets to delete. Defaults to `None`.
- If `None`, all datasets will be deleted.
- If an array of IDs, only the specified datasets will be deleted.
- If an empty array, no datasets will be deleted.


#### Returns #### Returns


#### Examples #### Examples


```python ```python
rag_object.delete_datasets(ids=["id_1","id_2"])
rag_object.delete_datasets(ids=["d94a8dc02c9711f0930f7fbc369eab6d","e94a8dc02c9711f0930f7fbc369eab6e"])
``` ```


--- ---

+ 4
- 5
sdk/python/test/test_http_api/conftest.py Ver fichero

@pytest.fixture(scope="function") @pytest.fixture(scope="function")
def clear_datasets(request, get_http_api_auth): def clear_datasets(request, get_http_api_auth):
def cleanup(): def cleanup():
delete_datasets(get_http_api_auth)
delete_datasets(get_http_api_auth, {"ids": None})


request.addfinalizer(cleanup) request.addfinalizer(cleanup)


@pytest.fixture(scope="class") @pytest.fixture(scope="class")
def add_dataset(request, get_http_api_auth): def add_dataset(request, get_http_api_auth):
def cleanup(): def cleanup():
delete_datasets(get_http_api_auth)
delete_datasets(get_http_api_auth, {"ids": None})


request.addfinalizer(cleanup) request.addfinalizer(cleanup)


@pytest.fixture(scope="function") @pytest.fixture(scope="function")
def add_dataset_func(request, get_http_api_auth): def add_dataset_func(request, get_http_api_auth):
def cleanup(): def cleanup():
delete_datasets(get_http_api_auth)
delete_datasets(get_http_api_auth, {"ids": None})


request.addfinalizer(cleanup) request.addfinalizer(cleanup)


dataset_ids = batch_create_datasets(get_http_api_auth, 1)
return dataset_ids[0]
return batch_create_datasets(get_http_api_auth, 1)[0]




@pytest.fixture(scope="class") @pytest.fixture(scope="class")

+ 2
- 12
sdk/python/test/test_http_api/test_dataset_mangement/conftest.py Ver fichero

@pytest.fixture(scope="class") @pytest.fixture(scope="class")
def add_datasets(get_http_api_auth, request): def add_datasets(get_http_api_auth, request):
def cleanup(): def cleanup():
delete_datasets(get_http_api_auth)
delete_datasets(get_http_api_auth, {"ids": None})


request.addfinalizer(cleanup) request.addfinalizer(cleanup)


@pytest.fixture(scope="function") @pytest.fixture(scope="function")
def add_datasets_func(get_http_api_auth, request): def add_datasets_func(get_http_api_auth, request):
def cleanup(): def cleanup():
delete_datasets(get_http_api_auth)
delete_datasets(get_http_api_auth, {"ids": None})


request.addfinalizer(cleanup) request.addfinalizer(cleanup)


return batch_create_datasets(get_http_api_auth, 3) return batch_create_datasets(get_http_api_auth, 3)


@pytest.fixture(scope="function")
def add_dataset_func(get_http_api_auth, request):
def cleanup():
delete_datasets(get_http_api_auth)

request.addfinalizer(cleanup)

return batch_create_datasets(get_http_api_auth, 1)[0]

+ 134
- 65
sdk/python/test/test_http_api/test_dataset_mangement/test_delete_datasets.py Ver fichero

from libs.auth import RAGFlowHttpApiAuth from libs.auth import RAGFlowHttpApiAuth




@pytest.mark.p1
class TestAuthorization: class TestAuthorization:
@pytest.mark.p1
@pytest.mark.parametrize( @pytest.mark.parametrize(
"auth, expected_code, expected_message", "auth, expected_code, expected_message",
[ [
), ),
], ],
) )
def test_invalid_auth(self, auth, expected_code, expected_message):
def test_auth_invalid(self, auth, expected_code, expected_message):
res = delete_datasets(auth) res = delete_datasets(auth)
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message




class TestDatasetsDeletion:
class TestRquest:
@pytest.mark.p3
def test_content_type_bad(self, get_http_api_auth):
BAD_CONTENT_TYPE = "text/xml"
res = delete_datasets(get_http_api_auth, headers={"Content-Type": BAD_CONTENT_TYPE})
assert res["code"] == 101, res
assert res["message"] == f"Unsupported content type: Expected application/json, got {BAD_CONTENT_TYPE}", res

@pytest.mark.p3
@pytest.mark.parametrize(
"payload, expected_message",
[
("a", "Malformed JSON syntax: Missing commas/brackets or invalid encoding"),
('"a"', "Invalid request payload: expected object, got str"),
],
ids=["malformed_json_syntax", "invalid_request_payload_type"],
)
def test_payload_bad(self, get_http_api_auth, payload, expected_message):
res = delete_datasets(get_http_api_auth, data=payload)
assert res["code"] == 101, res
assert res["message"] == expected_message, res

@pytest.mark.p3
def test_payload_unset(self, get_http_api_auth):
res = delete_datasets(get_http_api_auth, None)
assert res["code"] == 101, res
assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res


class TestCapability:
@pytest.mark.p3
def test_delete_dataset_1k(self, get_http_api_auth):
ids = batch_create_datasets(get_http_api_auth, 1_000)
res = delete_datasets(get_http_api_auth, {"ids": ids})
assert res["code"] == 0, res

res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 0, res

@pytest.mark.p3
def test_concurrent_deletion(self, get_http_api_auth):
dataset_num = 1_000
ids = batch_create_datasets(get_http_api_auth, dataset_num)

with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(dataset_num)]
responses = [f.result() for f in futures]
assert all(r["code"] == 0 for r in responses), responses


class TestDatasetsDelete:
@pytest.mark.p1 @pytest.mark.p1
@pytest.mark.parametrize( @pytest.mark.parametrize(
"payload, expected_code, expected_message, remaining",
"func, expected_code, expected_message, remaining",
[ [
(None, 0, "", 0),
({"ids": []}, 0, "", 0),
({"ids": ["invalid_id"]}, 102, "You don't own the dataset invalid_id", 3),
(
{"ids": ["\n!?。;!?\"'"]},
102,
"You don't own the dataset \n!?。;!?\"'",
3,
),
(
"not json",
100,
"AttributeError(\"'str' object has no attribute 'get'\")",
3,
),
(lambda r: {"ids": r[:1]}, 0, "", 2), (lambda r: {"ids": r[:1]}, 0, "", 2),
(lambda r: {"ids": r}, 0, "", 0), (lambda r: {"ids": r}, 0, "", 0),
], ],
ids=["single_dataset", "multiple_datasets"],
) )
def test_basic_scenarios(self, get_http_api_auth, add_datasets_func, payload, expected_code, expected_message, remaining):
def test_ids(self, get_http_api_auth, add_datasets_func, func, expected_code, expected_message, remaining):
dataset_ids = add_datasets_func dataset_ids = add_datasets_func
if callable(payload):
payload = payload(dataset_ids)
if callable(func):
payload = func(dataset_ids)
res = delete_datasets(get_http_api_auth, payload) res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == expected_code
if res["code"] != 0:
assert res["message"] == expected_message
assert res["code"] == expected_code, res

res = list_datasets(get_http_api_auth)
assert len(res["data"]) == remaining, res

@pytest.mark.p1
@pytest.mark.usefixtures("add_dataset_func")
def test_ids_empty(self, get_http_api_auth):
payload = {"ids": []}
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 0, res

res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 1, res

@pytest.mark.p1
@pytest.mark.usefixtures("add_datasets_func")
def test_ids_none(self, get_http_api_auth):
payload = {"ids": None}
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 0, res

res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 0, res

@pytest.mark.p2
@pytest.mark.usefixtures("add_dataset_func")
def test_id_not_uuid(self, get_http_api_auth):
payload = {"ids": ["not_uuid"]}
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 101, res
assert "Input should be a valid UUID" in res["message"], res

res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 1, res

@pytest.mark.p2
@pytest.mark.usefixtures("add_dataset_func")
def test_id_wrong_uuid(self, get_http_api_auth):
payload = {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"]}
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 102, res
assert "lacks permission for dataset" in res["message"], res


res = list_datasets(get_http_api_auth) res = list_datasets(get_http_api_auth)
assert len(res["data"]) == remaining
assert len(res["data"]) == 1, res


@pytest.mark.p2 @pytest.mark.p2
@pytest.mark.parametrize( @pytest.mark.parametrize(
"payload",
"func",
[ [
lambda r: {"ids": ["invalid_id"] + r},
lambda r: {"ids": r[:1] + ["invalid_id"] + r[1:3]},
lambda r: {"ids": r + ["invalid_id"]},
lambda r: {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"] + r},
lambda r: {"ids": r[:1] + ["d94a8dc02c9711f0930f7fbc369eab6d"] + r[1:3]},
lambda r: {"ids": r + ["d94a8dc02c9711f0930f7fbc369eab6d"]},
], ],
) )
def test_delete_partial_invalid_id(self, get_http_api_auth, add_datasets_func, payload):
def test_ids_partial_invalid(self, get_http_api_auth, add_datasets_func, func):
dataset_ids = add_datasets_func dataset_ids = add_datasets_func
if callable(payload):
payload = payload(dataset_ids)
if callable(func):
payload = func(dataset_ids)
res = delete_datasets(get_http_api_auth, payload) res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 0
assert res["data"]["errors"][0] == "You don't own the dataset invalid_id"
assert res["data"]["success_count"] == 3
assert res["code"] == 102, res
assert "lacks permission for dataset" in res["message"], res


res = list_datasets(get_http_api_auth) res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 0
assert len(res["data"]) == 3, res


@pytest.mark.p2 @pytest.mark.p2
def test_repeated_deletion(self, get_http_api_auth, add_datasets_func):
def test_ids_duplicate(self, get_http_api_auth, add_datasets_func):
dataset_ids = add_datasets_func dataset_ids = add_datasets_func
res = delete_datasets(get_http_api_auth, {"ids": dataset_ids})
assert res["code"] == 0
payload = {"ids": dataset_ids + dataset_ids}
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 101, res
assert "Duplicate ids:" in res["message"], res


res = delete_datasets(get_http_api_auth, {"ids": dataset_ids})
assert res["code"] == 102
assert "You don't own the dataset" in res["message"]
res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 3, res


@pytest.mark.p2 @pytest.mark.p2
def test_duplicate_deletion(self, get_http_api_auth, add_datasets_func):
def test_repeated_delete(self, get_http_api_auth, add_datasets_func):
dataset_ids = add_datasets_func dataset_ids = add_datasets_func
res = delete_datasets(get_http_api_auth, {"ids": dataset_ids + dataset_ids})
assert res["code"] == 0
assert "Duplicate dataset ids" in res["data"]["errors"][0]
assert res["data"]["success_count"] == 3

res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 0

@pytest.mark.p3
def test_concurrent_deletion(self, get_http_api_auth):
ids = batch_create_datasets(get_http_api_auth, 100)
payload = {"ids": dataset_ids}
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 0, res


with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(100)]
responses = [f.result() for f in futures]
assert all(r["code"] == 0 for r in responses)
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 102, res
assert "lacks permission for dataset" in res["message"], res


@pytest.mark.p3
def test_delete_10k(self, get_http_api_auth):
ids = batch_create_datasets(get_http_api_auth, 10_000)
res = delete_datasets(get_http_api_auth, {"ids": ids})
assert res["code"] == 0
@pytest.mark.p2
@pytest.mark.usefixtures("add_dataset_func")
def test_field_unsupported(self, get_http_api_auth):
payload = {"unknown_field": "unknown_field"}
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 101, res
assert "Extra inputs are not permitted" in res["message"], res


res = list_datasets(get_http_api_auth) res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 0
assert len(res["data"]) == 1, res

+ 7
- 0
sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py Ver fichero

assert res["code"] == 101, res assert res["code"] == 101, res
assert res["message"] == "No properties were modified", res assert res["message"] == "No properties were modified", res


@pytest.mark.p3
def test_payload_unset(self, get_http_api_auth, add_dataset_func):
dataset_id = add_dataset_func
res = update_dataset(get_http_api_auth, dataset_id, None)
assert res["code"] == 101, res
assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res



class TestCapability: class TestCapability:
@pytest.mark.p3 @pytest.mark.p3

Cargando…
Cancelar
Guardar