Преглед изворни кода

Refa: validation utils to use Pydantic v2 style models (#9037)

### What problem does this PR solve?

- Update BaseModel to use model_config instead of Config class
- Replace StrEnum with Literal types for method fields
- Convert Field declarations to Annotated style

### Type of change

- [x] Refactoring
tags/v0.20.0
Liu An пре 3 месеци
родитељ
комит
b5ffca332a
No account linked to committer's email address

+ 60
- 85
api/utils/validation_utils.py Прегледај датотеку

@@ -14,14 +14,19 @@
# limitations under the License.
#
from collections import Counter
from enum import auto
from typing import Annotated, Any
from typing import Annotated, Any, Literal
from uuid import UUID

from flask import Request
from pydantic import BaseModel, Field, StringConstraints, ValidationError, field_validator
from pydantic import (
BaseModel,
ConfigDict,
Field,
StringConstraints,
ValidationError,
field_validator,
)
from pydantic_core import PydanticCustomError
from strenum import StrEnum
from werkzeug.exceptions import BadRequest, UnsupportedMediaType

from api.constants import DATASET_NAME_LIMIT
@@ -307,38 +312,12 @@ def validate_uuid1_hex(v: Any) -> str:
raise PydanticCustomError("invalid_UUID1_format", "Invalid UUID1 format")


class PermissionEnum(StrEnum):
me = auto()
team = auto()


class ChunkMethodEnum(StrEnum):
naive = auto()
book = auto()
email = auto()
laws = auto()
manual = auto()
one = auto()
paper = auto()
picture = auto()
presentation = auto()
qa = auto()
table = auto()
tag = auto()


class GraphragMethodEnum(StrEnum):
light = auto()
general = auto()


class Base(BaseModel):
class Config:
extra = "forbid"
model_config = ConfigDict(extra="forbid", strict=True)


class RaptorConfig(Base):
use_raptor: bool = Field(default=False)
use_raptor: Annotated[bool, Field(default=False)]
prompt: Annotated[
str,
StringConstraints(strip_whitespace=True, min_length=1),
@@ -346,46 +325,49 @@ class RaptorConfig(Base):
default="Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize."
),
]
max_token: int = Field(default=256, ge=1, le=2048)
threshold: float = Field(default=0.1, ge=0.0, le=1.0)
max_cluster: int = Field(default=64, ge=1, le=1024)
random_seed: int = Field(default=0, ge=0)
max_token: Annotated[int, Field(default=256, ge=1, le=2048)]
threshold: Annotated[float, Field(default=0.1, ge=0.0, le=1.0)]
max_cluster: Annotated[int, Field(default=64, ge=1, le=1024)]
random_seed: Annotated[int, Field(default=0, ge=0)]


class GraphragConfig(Base):
use_graphrag: bool = Field(default=False)
entity_types: list[str] = Field(default_factory=lambda: ["organization", "person", "geo", "event", "category"])
method: GraphragMethodEnum = Field(default=GraphragMethodEnum.light)
community: bool = Field(default=False)
resolution: bool = Field(default=False)
use_graphrag: Annotated[bool, Field(default=False)]
entity_types: Annotated[list[str], Field(default_factory=lambda: ["organization", "person", "geo", "event", "category"])]
method: Annotated[Literal["light", "general"], Field(default="light")]
community: Annotated[bool, Field(default=False)]
resolution: Annotated[bool, Field(default=False)]


class ParserConfig(Base):
auto_keywords: int = Field(default=0, ge=0, le=32)
auto_questions: int = Field(default=0, ge=0, le=10)
chunk_token_num: int = Field(default=512, ge=1, le=2048)
delimiter: str = Field(default=r"\n", min_length=1)
graphrag: GraphragConfig = Field(default_factory=lambda: GraphragConfig(use_graphrag=False))
html4excel: bool = False
layout_recognize: str = "DeepDOC"
raptor: RaptorConfig = Field(default_factory=lambda: RaptorConfig(use_raptor=False))
tag_kb_ids: list[str] = Field(default_factory=list)
topn_tags: int = Field(default=1, ge=1, le=10)
filename_embd_weight: float | None = Field(default=0.1, ge=0.0, le=1.0)
task_page_size: int | None = Field(default=None, ge=1)
pages: list[list[int]] | None = None
auto_keywords: Annotated[int, Field(default=0, ge=0, le=32)]
auto_questions: Annotated[int, Field(default=0, ge=0, le=10)]
chunk_token_num: Annotated[int, Field(default=512, ge=1, le=2048)]
delimiter: Annotated[str, Field(default=r"\n", min_length=1)]
graphrag: Annotated[GraphragConfig, Field(default_factory=lambda: GraphragConfig(use_graphrag=False))]
html4excel: Annotated[bool, Field(default=False)]
layout_recognize: Annotated[str, Field(default="DeepDOC")]
raptor: Annotated[RaptorConfig, Field(default_factory=lambda: RaptorConfig(use_raptor=False))]
tag_kb_ids: Annotated[list[str], Field(default_factory=list)]
topn_tags: Annotated[int, Field(default=1, ge=1, le=10)]
filename_embd_weight: Annotated[float | None, Field(default=0.1, ge=0.0, le=1.0)]
task_page_size: Annotated[int | None, Field(default=None, ge=1)]
pages: Annotated[list[list[int]] | None, Field(default=None)]


class CreateDatasetReq(Base):
name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=DATASET_NAME_LIMIT), Field(...)]
avatar: str | None = Field(default=None, max_length=65535)
description: str | None = Field(default=None, max_length=65535)
embedding_model: str | None = Field(default=None, max_length=255, serialization_alias="embd_id")
permission: PermissionEnum = Field(default=PermissionEnum.me, min_length=1, max_length=16)
chunk_method: ChunkMethodEnum = Field(default=ChunkMethodEnum.naive, min_length=1, max_length=32, serialization_alias="parser_id")
parser_config: ParserConfig | None = Field(default=None)

@field_validator("avatar")
avatar: Annotated[str | None, Field(default=None, max_length=65535)]
description: Annotated[str | None, Field(default=None, max_length=65535)]
embedding_model: Annotated[str | None, Field(default=None, max_length=255, serialization_alias="embd_id")]
permission: Annotated[Literal["me", "team"], Field(default="me", min_length=1, max_length=16)]
chunk_method: Annotated[
Literal["naive", "book", "email", "laws", "manual", "one", "paper", "picture", "presentation", "qa", "table", "tag"],
Field(default="naive", min_length=1, max_length=32, serialization_alias="parser_id"),
]
parser_config: Annotated[ParserConfig | None, Field(default=None)]

@field_validator("avatar", mode="after")
@classmethod
def validate_avatar_base64(cls, v: str | None) -> str | None:
"""
@@ -438,6 +420,7 @@ class CreateDatasetReq(Base):
@field_validator("embedding_model", mode="before")
@classmethod
def normalize_embedding_model(cls, v: Any) -> Any:
"""Normalize embedding model string by stripping whitespace"""
if isinstance(v, str):
return v.strip()
return v
@@ -484,10 +467,10 @@ class CreateDatasetReq(Base):
raise PydanticCustomError("format_invalid", "Model name and provider cannot be whitespace-only strings")
return v

@field_validator("permission", mode="before")
@classmethod
def normalize_permission(cls, v: Any) -> Any:
return normalize_str(v)
# @field_validator("permission", mode="before")
# @classmethod
# def normalize_permission(cls, v: Any) -> Any:
# return normalize_str(v)

@field_validator("parser_config", mode="before")
@classmethod
@@ -544,9 +527,9 @@ class CreateDatasetReq(Base):


class UpdateDatasetReq(CreateDatasetReq):
dataset_id: str = Field(...)
dataset_id: Annotated[str, Field(...)]
name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=DATASET_NAME_LIMIT), Field(default="")]
pagerank: int = Field(default=0, ge=0, le=100)
pagerank: Annotated[int, Field(default=0, ge=0, le=100)]

@field_validator("dataset_id", mode="before")
@classmethod
@@ -555,7 +538,7 @@ class UpdateDatasetReq(CreateDatasetReq):


class DeleteReq(Base):
ids: list[str] | None = Field(...)
ids: Annotated[list[str] | None, Field(...)]

@field_validator("ids", mode="after")
@classmethod
@@ -634,28 +617,20 @@ class DeleteReq(Base):
class DeleteDatasetReq(DeleteReq): ...


class OrderByEnum(StrEnum):
create_time = auto()
update_time = auto()
class BaseListReq(BaseModel):
model_config = ConfigDict(extra="forbid")


class BaseListReq(Base):
id: str | None = None
name: str | None = None
page: int = Field(default=1, ge=1)
page_size: int = Field(default=30, ge=1)
orderby: OrderByEnum = Field(default=OrderByEnum.create_time)
desc: bool = Field(default=True)
id: Annotated[str | None, Field(default=None)]
name: Annotated[str | None, Field(default=None)]
page: Annotated[int, Field(default=1, ge=1)]
page_size: Annotated[int, Field(default=30, ge=1)]
orderby: Annotated[Literal["create_time", "update_time"], Field(default="create_time")]
desc: Annotated[bool, Field(default=True)]

@field_validator("id", mode="before")
@classmethod
def validate_id(cls, v: Any) -> str:
return validate_uuid1_hex(v)

@field_validator("orderby", mode="before")
@classmethod
def normalize_orderby(cls, v: Any) -> Any:
return normalize_str(v)


class ListDatasetReq(BaseListReq): ...

+ 29
- 29
test/testcases/test_http_api/test_dataset_mangement/test_create_dataset.py Прегледај датотеку

@@ -299,11 +299,8 @@ class TestDatasetCreate:
[
("me", "me"),
("team", "team"),
("me_upercase", "ME"),
("team_upercase", "TEAM"),
("whitespace", " ME "),
],
ids=["me", "team", "me_upercase", "team_upercase", "whitespace"],
ids=["me", "team"],
)
def test_permission(self, HttpApiAuth, name, permission):
payload = {"name": name, "permission": permission}
@@ -318,8 +315,11 @@ class TestDatasetCreate:
("empty", ""),
("unknown", "unknown"),
("type_error", list()),
("me_upercase", "ME"),
("team_upercase", "TEAM"),
("whitespace", " ME "),
],
ids=["empty", "unknown", "type_error"],
ids=["empty", "unknown", "type_error", "me_upercase", "team_upercase", "whitespace"],
)
def test_permission_invalid(self, HttpApiAuth, name, permission):
payload = {"name": name, "permission": permission}
@@ -517,57 +517,57 @@ class TestDatasetCreate:
[
("auto_keywords_min_limit", {"auto_keywords": -1}, "Input should be greater than or equal to 0"),
("auto_keywords_max_limit", {"auto_keywords": 33}, "Input should be less than or equal to 32"),
("auto_keywords_float_not_allowed", {"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
("auto_keywords_type_invalid", {"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
("auto_keywords_float_not_allowed", {"auto_keywords": 3.14}, "Input should be a valid integer"),
("auto_keywords_type_invalid", {"auto_keywords": "string"}, "Input should be a valid integer"),
("auto_questions_min_limit", {"auto_questions": -1}, "Input should be greater than or equal to 0"),
("auto_questions_max_limit", {"auto_questions": 11}, "Input should be less than or equal to 10"),
("auto_questions_float_not_allowed", {"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
("auto_questions_type_invalid", {"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
("auto_questions_float_not_allowed", {"auto_questions": 3.14}, "Input should be a valid integer"),
("auto_questions_type_invalid", {"auto_questions": "string"}, "Input should be a valid integer"),
("chunk_token_num_min_limit", {"chunk_token_num": 0}, "Input should be greater than or equal to 1"),
("chunk_token_num_max_limit", {"chunk_token_num": 2049}, "Input should be less than or equal to 2048"),
("chunk_token_num_float_not_allowed", {"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
("chunk_token_num_type_invalid", {"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
("chunk_token_num_float_not_allowed", {"chunk_token_num": 3.14}, "Input should be a valid integer"),
("chunk_token_num_type_invalid", {"chunk_token_num": "string"}, "Input should be a valid integer"),
("delimiter_empty", {"delimiter": ""}, "String should have at least 1 character"),
("html4excel_type_invalid", {"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"),
("html4excel_type_invalid", {"html4excel": "string"}, "Input should be a valid boolean"),
("tag_kb_ids_not_list", {"tag_kb_ids": "1,2"}, "Input should be a valid list"),
("tag_kb_ids_int_in_list", {"tag_kb_ids": [1, 2]}, "Input should be a valid string"),
("topn_tags_min_limit", {"topn_tags": 0}, "Input should be greater than or equal to 1"),
("topn_tags_max_limit", {"topn_tags": 11}, "Input should be less than or equal to 10"),
("topn_tags_float_not_allowed", {"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
("topn_tags_type_invalid", {"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
("topn_tags_float_not_allowed", {"topn_tags": 3.14}, "Input should be a valid integer"),
("topn_tags_type_invalid", {"topn_tags": "string"}, "Input should be a valid integer"),
("filename_embd_weight_min_limit", {"filename_embd_weight": -1}, "Input should be greater than or equal to 0"),
("filename_embd_weight_max_limit", {"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"),
("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"),
("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number"),
("task_page_size_min_limit", {"task_page_size": 0}, "Input should be greater than or equal to 1"),
("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer"),
("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer"),
("pages_not_list", {"pages": "1,2"}, "Input should be a valid list"),
("pages_not_list_in_list", {"pages": ["1,2"]}, "Input should be a valid list"),
("pages_not_int_list", {"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"),
("graphrag_type_invalid", {"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"),
("pages_not_int_list", {"pages": [["string1", "string2"]]}, "Input should be a valid integer"),
("graphrag_type_invalid", {"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean"),
("graphrag_entity_types_not_list", {"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"),
("graphrag_entity_types_not_str_in_list", {"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"),
("graphrag_method_unknown", {"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"),
("graphrag_method_none", {"graphrag": {"method": None}}, "Input should be 'light' or 'general'"),
("graphrag_community_type_invalid", {"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"),
("graphrag_resolution_type_invalid", {"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"),
("raptor_type_invalid", {"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"),
("graphrag_community_type_invalid", {"graphrag": {"community": "string"}}, "Input should be a valid boolean"),
("graphrag_resolution_type_invalid", {"graphrag": {"resolution": "string"}}, "Input should be a valid boolean"),
("raptor_type_invalid", {"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean"),
("raptor_prompt_empty", {"raptor": {"prompt": ""}}, "String should have at least 1 character"),
("raptor_prompt_space", {"raptor": {"prompt": " "}}, "String should have at least 1 character"),
("raptor_max_token_min_limit", {"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"),
("raptor_max_token_max_limit", {"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"),
("raptor_max_token_float_not_allowed", {"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
("raptor_max_token_type_invalid", {"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
("raptor_max_token_float_not_allowed", {"raptor": {"max_token": 3.14}}, "Input should be a valid integer"),
("raptor_max_token_type_invalid", {"raptor": {"max_token": "string"}}, "Input should be a valid integer"),
("raptor_threshold_min_limit", {"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"),
("raptor_threshold_max_limit", {"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"),
("raptor_threshold_type_invalid", {"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"),
("raptor_threshold_type_invalid", {"raptor": {"threshold": "string"}}, "Input should be a valid number"),
("raptor_max_cluster_min_limit", {"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"),
("raptor_max_cluster_max_limit", {"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"),
("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"),
("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer"),
("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer"),
("raptor_random_seed_min_limit", {"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"),
("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer"),
("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer"),
("parser_config_type_invalid", {"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"),
],
ids=[

+ 5
- 5
test/testcases/test_http_api/test_dataset_mangement/test_list_datasets.py Прегледај датотеку

@@ -148,11 +148,8 @@ class TestDatasetsList:
[
({"orderby": "create_time"}, lambda r: (is_sorted(r["data"], "create_time", True))),
({"orderby": "update_time"}, lambda r: (is_sorted(r["data"], "update_time", True))),
({"orderby": "CREATE_TIME"}, lambda r: (is_sorted(r["data"], "create_time", True))),
({"orderby": "UPDATE_TIME"}, lambda r: (is_sorted(r["data"], "update_time", True))),
({"orderby": " create_time "}, lambda r: (is_sorted(r["data"], "update_time", True))),
],
ids=["orderby_create_time", "orderby_update_time", "orderby_create_time_upper", "orderby_update_time_upper", "whitespace"],
ids=["orderby_create_time", "orderby_update_time"],
)
def test_orderby(self, HttpApiAuth, params, assertions):
res = list_datasets(HttpApiAuth, params)
@@ -166,8 +163,11 @@ class TestDatasetsList:
[
{"orderby": ""},
{"orderby": "unknown"},
({"orderby": "CREATE_TIME"}, lambda r: (is_sorted(r["data"], "create_time", True))),
({"orderby": "UPDATE_TIME"}, lambda r: (is_sorted(r["data"], "update_time", True))),
({"orderby": " create_time "}, lambda r: (is_sorted(r["data"], "update_time", True))),
],
ids=["empty", "unknown"],
ids=["empty", "unknown", "orderby_create_time_upper", "orderby_update_time_upper", "whitespace"],
)
def test_orderby_invalid(self, HttpApiAuth, params):
res = list_datasets(HttpApiAuth, params)

+ 29
- 29
test/testcases/test_http_api/test_dataset_mangement/test_update_dataset.py Прегледај датотеку

@@ -337,11 +337,8 @@ class TestDatasetUpdate:
[
"me",
"team",
"ME",
"TEAM",
" ME ",
],
ids=["me", "team", "me_upercase", "team_upercase", "whitespace"],
ids=["me", "team"],
)
def test_permission(self, HttpApiAuth, add_dataset_func, permission):
dataset_id = add_dataset_func
@@ -360,8 +357,11 @@ class TestDatasetUpdate:
"",
"unknown",
list(),
"ME",
"TEAM",
" ME ",
],
ids=["empty", "unknown", "type_error"],
ids=["empty", "unknown", "type_error", "me_upercase", "team_upercase", "whitespace"],
)
def test_permission_invalid(self, HttpApiAuth, add_dataset_func, permission):
dataset_id = add_dataset_func
@@ -623,57 +623,57 @@ class TestDatasetUpdate:
[
({"auto_keywords": -1}, "Input should be greater than or equal to 0"),
({"auto_keywords": 33}, "Input should be less than or equal to 32"),
({"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
({"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
({"auto_keywords": 3.14}, "Input should be a valid integer"),
({"auto_keywords": "string"}, "Input should be a valid integer"),
({"auto_questions": -1}, "Input should be greater than or equal to 0"),
({"auto_questions": 11}, "Input should be less than or equal to 10"),
({"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
({"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
({"auto_questions": 3.14}, "Input should be a valid integer"),
({"auto_questions": "string"}, "Input should be a valid integer"),
({"chunk_token_num": 0}, "Input should be greater than or equal to 1"),
({"chunk_token_num": 2049}, "Input should be less than or equal to 2048"),
({"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
({"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
({"chunk_token_num": 3.14}, "Input should be a valid integer"),
({"chunk_token_num": "string"}, "Input should be a valid integer"),
({"delimiter": ""}, "String should have at least 1 character"),
({"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"),
({"html4excel": "string"}, "Input should be a valid boolean"),
({"tag_kb_ids": "1,2"}, "Input should be a valid list"),
({"tag_kb_ids": [1, 2]}, "Input should be a valid string"),
({"topn_tags": 0}, "Input should be greater than or equal to 1"),
({"topn_tags": 11}, "Input should be less than or equal to 10"),
({"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
({"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
({"topn_tags": 3.14}, "Input should be a valid integer"),
({"topn_tags": "string"}, "Input should be a valid integer"),
({"filename_embd_weight": -1}, "Input should be greater than or equal to 0"),
({"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"),
({"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"),
({"filename_embd_weight": "string"}, "Input should be a valid number"),
({"task_page_size": 0}, "Input should be greater than or equal to 1"),
({"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
({"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
({"task_page_size": 3.14}, "Input should be a valid integer"),
({"task_page_size": "string"}, "Input should be a valid integer"),
({"pages": "1,2"}, "Input should be a valid list"),
({"pages": ["1,2"]}, "Input should be a valid list"),
({"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"),
({"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"),
({"pages": [["string1", "string2"]]}, "Input should be a valid integer"),
({"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean"),
({"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"),
({"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"),
({"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"),
({"graphrag": {"method": None}}, "Input should be 'light' or 'general'"),
({"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"),
({"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"),
({"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"),
({"graphrag": {"community": "string"}}, "Input should be a valid boolean"),
({"graphrag": {"resolution": "string"}}, "Input should be a valid boolean"),
({"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean"),
({"raptor": {"prompt": ""}}, "String should have at least 1 character"),
({"raptor": {"prompt": " "}}, "String should have at least 1 character"),
({"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"),
({"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"),
({"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
({"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
({"raptor": {"max_token": 3.14}}, "Input should be a valid integer"),
({"raptor": {"max_token": "string"}}, "Input should be a valid integer"),
({"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"),
({"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"),
({"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"),
({"raptor": {"threshold": "string"}}, "Input should be a valid number"),
({"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"),
({"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"),
({"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"),
({"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
({"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer"),
({"raptor": {"max_cluster": "string"}}, "Input should be a valid integer"),
({"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"),
({"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
({"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
({"raptor": {"random_seed": 3.14}}, "Input should be a valid integer"),
({"raptor": {"random_seed": "string"}}, "Input should be a valid integer"),
({"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"),
],
ids=[

+ 29
- 29
test/testcases/test_sdk_api/test_dataset_mangement/test_create_dataset.py Прегледај датотеку

@@ -254,11 +254,8 @@ class TestDatasetCreate:
[
("me", "me"),
("team", "team"),
("me_upercase", "ME"),
("team_upercase", "TEAM"),
("whitespace", " ME "),
],
ids=["me", "team", "me_upercase", "team_upercase", "whitespace"],
ids=["me", "team"],
)
def test_permission(self, client, name, permission):
payload = {"name": name, "permission": permission}
@@ -271,8 +268,11 @@ class TestDatasetCreate:
[
("empty", ""),
("unknown", "unknown"),
("me_upercase", "ME"),
("team_upercase", "TEAM"),
("whitespace", " ME "),
],
ids=["empty", "unknown"],
ids=["empty", "unknown", "me_upercase", "team_upercase", "whitespace"],
)
def test_permission_invalid(self, client, name, permission):
payload = {"name": name, "permission": permission}
@@ -466,57 +466,57 @@ class TestDatasetCreate:
[
("auto_keywords_min_limit", {"auto_keywords": -1}, "Input should be greater than or equal to 0"),
("auto_keywords_max_limit", {"auto_keywords": 33}, "Input should be less than or equal to 32"),
("auto_keywords_float_not_allowed", {"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
("auto_keywords_type_invalid", {"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
("auto_keywords_float_not_allowed", {"auto_keywords": 3.14}, "Input should be a valid integer"),
("auto_keywords_type_invalid", {"auto_keywords": "string"}, "Input should be a valid integer"),
("auto_questions_min_limit", {"auto_questions": -1}, "Input should be greater than or equal to 0"),
("auto_questions_max_limit", {"auto_questions": 11}, "Input should be less than or equal to 10"),
("auto_questions_float_not_allowed", {"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
("auto_questions_type_invalid", {"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
("auto_questions_float_not_allowed", {"auto_questions": 3.14}, "Input should be a valid integer"),
("auto_questions_type_invalid", {"auto_questions": "string"}, "Input should be a valid integer"),
("chunk_token_num_min_limit", {"chunk_token_num": 0}, "Input should be greater than or equal to 1"),
("chunk_token_num_max_limit", {"chunk_token_num": 2049}, "Input should be less than or equal to 2048"),
("chunk_token_num_float_not_allowed", {"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
("chunk_token_num_type_invalid", {"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
("chunk_token_num_float_not_allowed", {"chunk_token_num": 3.14}, "Input should be a valid integer"),
("chunk_token_num_type_invalid", {"chunk_token_num": "string"}, "Input should be a valid integer"),
("delimiter_empty", {"delimiter": ""}, "String should have at least 1 character"),
("html4excel_type_invalid", {"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"),
("html4excel_type_invalid", {"html4excel": "string"}, "Input should be a valid boolean"),
("tag_kb_ids_not_list", {"tag_kb_ids": "1,2"}, "Input should be a valid list"),
("tag_kb_ids_int_in_list", {"tag_kb_ids": [1, 2]}, "Input should be a valid string"),
("topn_tags_min_limit", {"topn_tags": 0}, "Input should be greater than or equal to 1"),
("topn_tags_max_limit", {"topn_tags": 11}, "Input should be less than or equal to 10"),
("topn_tags_float_not_allowed", {"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
("topn_tags_type_invalid", {"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
("topn_tags_float_not_allowed", {"topn_tags": 3.14}, "Input should be a valid integer"),
("topn_tags_type_invalid", {"topn_tags": "string"}, "Input should be a valid integer"),
("filename_embd_weight_min_limit", {"filename_embd_weight": -1}, "Input should be greater than or equal to 0"),
("filename_embd_weight_max_limit", {"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"),
("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"),
("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number"),
("task_page_size_min_limit", {"task_page_size": 0}, "Input should be greater than or equal to 1"),
("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer"),
("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer"),
("pages_not_list", {"pages": "1,2"}, "Input should be a valid list"),
("pages_not_list_in_list", {"pages": ["1,2"]}, "Input should be a valid list"),
("pages_not_int_list", {"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"),
("graphrag_type_invalid", {"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"),
("pages_not_int_list", {"pages": [["string1", "string2"]]}, "Input should be a valid integer"),
("graphrag_type_invalid", {"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean"),
("graphrag_entity_types_not_list", {"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"),
("graphrag_entity_types_not_str_in_list", {"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"),
("graphrag_method_unknown", {"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"),
("graphrag_method_none", {"graphrag": {"method": None}}, "Input should be 'light' or 'general'"),
("graphrag_community_type_invalid", {"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"),
("graphrag_resolution_type_invalid", {"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"),
("raptor_type_invalid", {"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"),
("graphrag_community_type_invalid", {"graphrag": {"community": "string"}}, "Input should be a valid boolean"),
("graphrag_resolution_type_invalid", {"graphrag": {"resolution": "string"}}, "Input should be a valid boolean"),
("raptor_type_invalid", {"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean"),
("raptor_prompt_empty", {"raptor": {"prompt": ""}}, "String should have at least 1 character"),
("raptor_prompt_space", {"raptor": {"prompt": " "}}, "String should have at least 1 character"),
("raptor_max_token_min_limit", {"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"),
("raptor_max_token_max_limit", {"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"),
("raptor_max_token_float_not_allowed", {"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
("raptor_max_token_type_invalid", {"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
("raptor_max_token_float_not_allowed", {"raptor": {"max_token": 3.14}}, "Input should be a valid integer"),
("raptor_max_token_type_invalid", {"raptor": {"max_token": "string"}}, "Input should be a valid integer"),
("raptor_threshold_min_limit", {"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"),
("raptor_threshold_max_limit", {"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"),
("raptor_threshold_type_invalid", {"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"),
("raptor_threshold_type_invalid", {"raptor": {"threshold": "string"}}, "Input should be a valid number"),
("raptor_max_cluster_min_limit", {"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"),
("raptor_max_cluster_max_limit", {"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"),
("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"),
("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer"),
("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer"),
("raptor_random_seed_min_limit", {"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"),
("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer"),
("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer"),
("parser_config_type_invalid", {"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"),
],
ids=[

+ 5
- 5
test/testcases/test_sdk_api/test_dataset_mangement/test_list_datasets.py Прегледај датотеку

@@ -141,11 +141,8 @@ class TestDatasetsList:
[
{"orderby": "create_time"},
{"orderby": "update_time"},
{"orderby": "CREATE_TIME"},
{"orderby": "UPDATE_TIME"},
{"orderby": " create_time "},
],
ids=["orderby_create_time", "orderby_update_time", "orderby_create_time_upper", "orderby_update_time_upper", "whitespace"],
ids=["orderby_create_time", "orderby_update_time"],
)
def test_orderby(self, client, params):
client.list_datasets(**params)
@@ -156,8 +153,11 @@ class TestDatasetsList:
[
{"orderby": ""},
{"orderby": "unknown"},
{"orderby": "CREATE_TIME"},
{"orderby": "UPDATE_TIME"},
{"orderby": " create_time "},
],
ids=["empty", "unknown"],
ids=["empty", "unknown", "orderby_create_time_upper", "orderby_update_time_upper", "whitespace"],
)
def test_orderby_invalid(self, client, params):
with pytest.raises(Exception) as excinfo:

+ 29
- 29
test/testcases/test_sdk_api/test_dataset_mangement/test_update_dataset.py Прегледај датотеку

@@ -242,11 +242,8 @@ class TestDatasetUpdate:
[
"me",
"team",
"ME",
"TEAM",
" ME ",
],
ids=["me", "team", "me_upercase", "team_upercase", "whitespace"],
ids=["me", "team"],
)
def test_permission(self, client, add_dataset_func, permission):
dataset = add_dataset_func
@@ -263,8 +260,11 @@ class TestDatasetUpdate:
"",
"unknown",
list(),
"ME",
"TEAM",
" ME ",
],
ids=["empty", "unknown", "type_error"],
ids=["empty", "unknown", "type_error", "me_upercase", "team_upercase", "whitespace"],
)
def test_permission_invalid(self, add_dataset_func, permission):
dataset = add_dataset_func
@@ -514,57 +514,57 @@ class TestDatasetUpdate:
[
({"auto_keywords": -1}, "Input should be greater than or equal to 0"),
({"auto_keywords": 33}, "Input should be less than or equal to 32"),
({"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
({"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
({"auto_keywords": 3.14}, "Input should be a valid integer"),
({"auto_keywords": "string"}, "Input should be a valid integer"),
({"auto_questions": -1}, "Input should be greater than or equal to 0"),
({"auto_questions": 11}, "Input should be less than or equal to 10"),
({"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
({"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
({"auto_questions": 3.14}, "Input should be a valid integer"),
({"auto_questions": "string"}, "Input should be a valid integer"),
({"chunk_token_num": 0}, "Input should be greater than or equal to 1"),
({"chunk_token_num": 2049}, "Input should be less than or equal to 2048"),
({"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
({"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
({"chunk_token_num": 3.14}, "Input should be a valid integer"),
({"chunk_token_num": "string"}, "Input should be a valid integer"),
({"delimiter": ""}, "String should have at least 1 character"),
({"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"),
({"html4excel": "string"}, "Input should be a valid boolean"),
({"tag_kb_ids": "1,2"}, "Input should be a valid list"),
({"tag_kb_ids": [1, 2]}, "Input should be a valid string"),
({"topn_tags": 0}, "Input should be greater than or equal to 1"),
({"topn_tags": 11}, "Input should be less than or equal to 10"),
({"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
({"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
({"topn_tags": 3.14}, "Input should be a valid integer"),
({"topn_tags": "string"}, "Input should be a valid integer"),
({"filename_embd_weight": -1}, "Input should be greater than or equal to 0"),
({"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"),
({"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"),
({"filename_embd_weight": "string"}, "Input should be a valid number"),
({"task_page_size": 0}, "Input should be greater than or equal to 1"),
({"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
({"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
({"task_page_size": 3.14}, "Input should be a valid integer"),
({"task_page_size": "string"}, "Input should be a valid integer"),
({"pages": "1,2"}, "Input should be a valid list"),
({"pages": ["1,2"]}, "Input should be a valid list"),
({"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"),
({"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"),
({"pages": [["string1", "string2"]]}, "Input should be a valid integer"),
({"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean"),
({"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"),
({"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"),
({"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"),
({"graphrag": {"method": None}}, "Input should be 'light' or 'general'"),
({"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"),
({"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"),
({"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"),
({"graphrag": {"community": "string"}}, "Input should be a valid boolean"),
({"graphrag": {"resolution": "string"}}, "Input should be a valid boolean"),
({"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean"),
({"raptor": {"prompt": ""}}, "String should have at least 1 character"),
({"raptor": {"prompt": " "}}, "String should have at least 1 character"),
({"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"),
({"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"),
({"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
({"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
({"raptor": {"max_token": 3.14}}, "Input should be a valid integer"),
({"raptor": {"max_token": "string"}}, "Input should be a valid integer"),
({"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"),
({"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"),
({"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"),
({"raptor": {"threshold": "string"}}, "Input should be a valid number"),
({"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"),
({"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"),
({"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"),
({"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
({"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer"),
({"raptor": {"max_cluster": "string"}}, "Input should be a valid integer"),
({"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"),
({"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
({"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
({"raptor": {"random_seed": 3.14}}, "Input should be a valid integer"),
({"raptor": {"random_seed": "string"}}, "Input should be a valid integer"),
({"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"),
],
ids=[

Loading…
Откажи
Сачувај