### What problem does this PR solve? - Update BaseModel to use model_config instead of Config class - Replace StrEnum with Literal types for method fields - Convert Field declarations to Annotated style ### Type of change - [x] Refactoringtags/v0.20.0
| @@ -14,14 +14,19 @@ | |||
| # limitations under the License. | |||
| # | |||
| from collections import Counter | |||
| from enum import auto | |||
| from typing import Annotated, Any | |||
| from typing import Annotated, Any, Literal | |||
| from uuid import UUID | |||
| from flask import Request | |||
| from pydantic import BaseModel, Field, StringConstraints, ValidationError, field_validator | |||
| from pydantic import ( | |||
| BaseModel, | |||
| ConfigDict, | |||
| Field, | |||
| StringConstraints, | |||
| ValidationError, | |||
| field_validator, | |||
| ) | |||
| from pydantic_core import PydanticCustomError | |||
| from strenum import StrEnum | |||
| from werkzeug.exceptions import BadRequest, UnsupportedMediaType | |||
| from api.constants import DATASET_NAME_LIMIT | |||
| @@ -307,38 +312,12 @@ def validate_uuid1_hex(v: Any) -> str: | |||
| raise PydanticCustomError("invalid_UUID1_format", "Invalid UUID1 format") | |||
| class PermissionEnum(StrEnum): | |||
| me = auto() | |||
| team = auto() | |||
| class ChunkMethodEnum(StrEnum): | |||
| naive = auto() | |||
| book = auto() | |||
| email = auto() | |||
| laws = auto() | |||
| manual = auto() | |||
| one = auto() | |||
| paper = auto() | |||
| picture = auto() | |||
| presentation = auto() | |||
| qa = auto() | |||
| table = auto() | |||
| tag = auto() | |||
| class GraphragMethodEnum(StrEnum): | |||
| light = auto() | |||
| general = auto() | |||
| class Base(BaseModel): | |||
| class Config: | |||
| extra = "forbid" | |||
| model_config = ConfigDict(extra="forbid", strict=True) | |||
| class RaptorConfig(Base): | |||
| use_raptor: bool = Field(default=False) | |||
| use_raptor: Annotated[bool, Field(default=False)] | |||
| prompt: Annotated[ | |||
| str, | |||
| StringConstraints(strip_whitespace=True, min_length=1), | |||
| @@ -346,46 +325,49 @@ class RaptorConfig(Base): | |||
| default="Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize." | |||
| ), | |||
| ] | |||
| max_token: int = Field(default=256, ge=1, le=2048) | |||
| threshold: float = Field(default=0.1, ge=0.0, le=1.0) | |||
| max_cluster: int = Field(default=64, ge=1, le=1024) | |||
| random_seed: int = Field(default=0, ge=0) | |||
| max_token: Annotated[int, Field(default=256, ge=1, le=2048)] | |||
| threshold: Annotated[float, Field(default=0.1, ge=0.0, le=1.0)] | |||
| max_cluster: Annotated[int, Field(default=64, ge=1, le=1024)] | |||
| random_seed: Annotated[int, Field(default=0, ge=0)] | |||
| class GraphragConfig(Base): | |||
| use_graphrag: bool = Field(default=False) | |||
| entity_types: list[str] = Field(default_factory=lambda: ["organization", "person", "geo", "event", "category"]) | |||
| method: GraphragMethodEnum = Field(default=GraphragMethodEnum.light) | |||
| community: bool = Field(default=False) | |||
| resolution: bool = Field(default=False) | |||
| use_graphrag: Annotated[bool, Field(default=False)] | |||
| entity_types: Annotated[list[str], Field(default_factory=lambda: ["organization", "person", "geo", "event", "category"])] | |||
| method: Annotated[Literal["light", "general"], Field(default="light")] | |||
| community: Annotated[bool, Field(default=False)] | |||
| resolution: Annotated[bool, Field(default=False)] | |||
| class ParserConfig(Base): | |||
| auto_keywords: int = Field(default=0, ge=0, le=32) | |||
| auto_questions: int = Field(default=0, ge=0, le=10) | |||
| chunk_token_num: int = Field(default=512, ge=1, le=2048) | |||
| delimiter: str = Field(default=r"\n", min_length=1) | |||
| graphrag: GraphragConfig = Field(default_factory=lambda: GraphragConfig(use_graphrag=False)) | |||
| html4excel: bool = False | |||
| layout_recognize: str = "DeepDOC" | |||
| raptor: RaptorConfig = Field(default_factory=lambda: RaptorConfig(use_raptor=False)) | |||
| tag_kb_ids: list[str] = Field(default_factory=list) | |||
| topn_tags: int = Field(default=1, ge=1, le=10) | |||
| filename_embd_weight: float | None = Field(default=0.1, ge=0.0, le=1.0) | |||
| task_page_size: int | None = Field(default=None, ge=1) | |||
| pages: list[list[int]] | None = None | |||
| auto_keywords: Annotated[int, Field(default=0, ge=0, le=32)] | |||
| auto_questions: Annotated[int, Field(default=0, ge=0, le=10)] | |||
| chunk_token_num: Annotated[int, Field(default=512, ge=1, le=2048)] | |||
| delimiter: Annotated[str, Field(default=r"\n", min_length=1)] | |||
| graphrag: Annotated[GraphragConfig, Field(default_factory=lambda: GraphragConfig(use_graphrag=False))] | |||
| html4excel: Annotated[bool, Field(default=False)] | |||
| layout_recognize: Annotated[str, Field(default="DeepDOC")] | |||
| raptor: Annotated[RaptorConfig, Field(default_factory=lambda: RaptorConfig(use_raptor=False))] | |||
| tag_kb_ids: Annotated[list[str], Field(default_factory=list)] | |||
| topn_tags: Annotated[int, Field(default=1, ge=1, le=10)] | |||
| filename_embd_weight: Annotated[float | None, Field(default=0.1, ge=0.0, le=1.0)] | |||
| task_page_size: Annotated[int | None, Field(default=None, ge=1)] | |||
| pages: Annotated[list[list[int]] | None, Field(default=None)] | |||
| class CreateDatasetReq(Base): | |||
| name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=DATASET_NAME_LIMIT), Field(...)] | |||
| avatar: str | None = Field(default=None, max_length=65535) | |||
| description: str | None = Field(default=None, max_length=65535) | |||
| embedding_model: str | None = Field(default=None, max_length=255, serialization_alias="embd_id") | |||
| permission: PermissionEnum = Field(default=PermissionEnum.me, min_length=1, max_length=16) | |||
| chunk_method: ChunkMethodEnum = Field(default=ChunkMethodEnum.naive, min_length=1, max_length=32, serialization_alias="parser_id") | |||
| parser_config: ParserConfig | None = Field(default=None) | |||
| @field_validator("avatar") | |||
| avatar: Annotated[str | None, Field(default=None, max_length=65535)] | |||
| description: Annotated[str | None, Field(default=None, max_length=65535)] | |||
| embedding_model: Annotated[str | None, Field(default=None, max_length=255, serialization_alias="embd_id")] | |||
| permission: Annotated[Literal["me", "team"], Field(default="me", min_length=1, max_length=16)] | |||
| chunk_method: Annotated[ | |||
| Literal["naive", "book", "email", "laws", "manual", "one", "paper", "picture", "presentation", "qa", "table", "tag"], | |||
| Field(default="naive", min_length=1, max_length=32, serialization_alias="parser_id"), | |||
| ] | |||
| parser_config: Annotated[ParserConfig | None, Field(default=None)] | |||
| @field_validator("avatar", mode="after") | |||
| @classmethod | |||
| def validate_avatar_base64(cls, v: str | None) -> str | None: | |||
| """ | |||
| @@ -438,6 +420,7 @@ class CreateDatasetReq(Base): | |||
| @field_validator("embedding_model", mode="before") | |||
| @classmethod | |||
| def normalize_embedding_model(cls, v: Any) -> Any: | |||
| """Normalize embedding model string by stripping whitespace""" | |||
| if isinstance(v, str): | |||
| return v.strip() | |||
| return v | |||
| @@ -484,10 +467,10 @@ class CreateDatasetReq(Base): | |||
| raise PydanticCustomError("format_invalid", "Model name and provider cannot be whitespace-only strings") | |||
| return v | |||
| @field_validator("permission", mode="before") | |||
| @classmethod | |||
| def normalize_permission(cls, v: Any) -> Any: | |||
| return normalize_str(v) | |||
| # @field_validator("permission", mode="before") | |||
| # @classmethod | |||
| # def normalize_permission(cls, v: Any) -> Any: | |||
| # return normalize_str(v) | |||
| @field_validator("parser_config", mode="before") | |||
| @classmethod | |||
| @@ -544,9 +527,9 @@ class CreateDatasetReq(Base): | |||
| class UpdateDatasetReq(CreateDatasetReq): | |||
| dataset_id: str = Field(...) | |||
| dataset_id: Annotated[str, Field(...)] | |||
| name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=DATASET_NAME_LIMIT), Field(default="")] | |||
| pagerank: int = Field(default=0, ge=0, le=100) | |||
| pagerank: Annotated[int, Field(default=0, ge=0, le=100)] | |||
| @field_validator("dataset_id", mode="before") | |||
| @classmethod | |||
| @@ -555,7 +538,7 @@ class UpdateDatasetReq(CreateDatasetReq): | |||
| class DeleteReq(Base): | |||
| ids: list[str] | None = Field(...) | |||
| ids: Annotated[list[str] | None, Field(...)] | |||
| @field_validator("ids", mode="after") | |||
| @classmethod | |||
| @@ -634,28 +617,20 @@ class DeleteReq(Base): | |||
| class DeleteDatasetReq(DeleteReq): ... | |||
| class OrderByEnum(StrEnum): | |||
| create_time = auto() | |||
| update_time = auto() | |||
| class BaseListReq(BaseModel): | |||
| model_config = ConfigDict(extra="forbid") | |||
| class BaseListReq(Base): | |||
| id: str | None = None | |||
| name: str | None = None | |||
| page: int = Field(default=1, ge=1) | |||
| page_size: int = Field(default=30, ge=1) | |||
| orderby: OrderByEnum = Field(default=OrderByEnum.create_time) | |||
| desc: bool = Field(default=True) | |||
| id: Annotated[str | None, Field(default=None)] | |||
| name: Annotated[str | None, Field(default=None)] | |||
| page: Annotated[int, Field(default=1, ge=1)] | |||
| page_size: Annotated[int, Field(default=30, ge=1)] | |||
| orderby: Annotated[Literal["create_time", "update_time"], Field(default="create_time")] | |||
| desc: Annotated[bool, Field(default=True)] | |||
| @field_validator("id", mode="before") | |||
| @classmethod | |||
| def validate_id(cls, v: Any) -> str: | |||
| return validate_uuid1_hex(v) | |||
| @field_validator("orderby", mode="before") | |||
| @classmethod | |||
| def normalize_orderby(cls, v: Any) -> Any: | |||
| return normalize_str(v) | |||
| class ListDatasetReq(BaseListReq): ... | |||
| @@ -299,11 +299,8 @@ class TestDatasetCreate: | |||
| [ | |||
| ("me", "me"), | |||
| ("team", "team"), | |||
| ("me_upercase", "ME"), | |||
| ("team_upercase", "TEAM"), | |||
| ("whitespace", " ME "), | |||
| ], | |||
| ids=["me", "team", "me_upercase", "team_upercase", "whitespace"], | |||
| ids=["me", "team"], | |||
| ) | |||
| def test_permission(self, HttpApiAuth, name, permission): | |||
| payload = {"name": name, "permission": permission} | |||
| @@ -318,8 +315,11 @@ class TestDatasetCreate: | |||
| ("empty", ""), | |||
| ("unknown", "unknown"), | |||
| ("type_error", list()), | |||
| ("me_upercase", "ME"), | |||
| ("team_upercase", "TEAM"), | |||
| ("whitespace", " ME "), | |||
| ], | |||
| ids=["empty", "unknown", "type_error"], | |||
| ids=["empty", "unknown", "type_error", "me_upercase", "team_upercase", "whitespace"], | |||
| ) | |||
| def test_permission_invalid(self, HttpApiAuth, name, permission): | |||
| payload = {"name": name, "permission": permission} | |||
| @@ -517,57 +517,57 @@ class TestDatasetCreate: | |||
| [ | |||
| ("auto_keywords_min_limit", {"auto_keywords": -1}, "Input should be greater than or equal to 0"), | |||
| ("auto_keywords_max_limit", {"auto_keywords": 33}, "Input should be less than or equal to 32"), | |||
| ("auto_keywords_float_not_allowed", {"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ("auto_keywords_type_invalid", {"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("auto_keywords_float_not_allowed", {"auto_keywords": 3.14}, "Input should be a valid integer"), | |||
| ("auto_keywords_type_invalid", {"auto_keywords": "string"}, "Input should be a valid integer"), | |||
| ("auto_questions_min_limit", {"auto_questions": -1}, "Input should be greater than or equal to 0"), | |||
| ("auto_questions_max_limit", {"auto_questions": 11}, "Input should be less than or equal to 10"), | |||
| ("auto_questions_float_not_allowed", {"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ("auto_questions_type_invalid", {"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("auto_questions_float_not_allowed", {"auto_questions": 3.14}, "Input should be a valid integer"), | |||
| ("auto_questions_type_invalid", {"auto_questions": "string"}, "Input should be a valid integer"), | |||
| ("chunk_token_num_min_limit", {"chunk_token_num": 0}, "Input should be greater than or equal to 1"), | |||
| ("chunk_token_num_max_limit", {"chunk_token_num": 2049}, "Input should be less than or equal to 2048"), | |||
| ("chunk_token_num_float_not_allowed", {"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ("chunk_token_num_type_invalid", {"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("chunk_token_num_float_not_allowed", {"chunk_token_num": 3.14}, "Input should be a valid integer"), | |||
| ("chunk_token_num_type_invalid", {"chunk_token_num": "string"}, "Input should be a valid integer"), | |||
| ("delimiter_empty", {"delimiter": ""}, "String should have at least 1 character"), | |||
| ("html4excel_type_invalid", {"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"), | |||
| ("html4excel_type_invalid", {"html4excel": "string"}, "Input should be a valid boolean"), | |||
| ("tag_kb_ids_not_list", {"tag_kb_ids": "1,2"}, "Input should be a valid list"), | |||
| ("tag_kb_ids_int_in_list", {"tag_kb_ids": [1, 2]}, "Input should be a valid string"), | |||
| ("topn_tags_min_limit", {"topn_tags": 0}, "Input should be greater than or equal to 1"), | |||
| ("topn_tags_max_limit", {"topn_tags": 11}, "Input should be less than or equal to 10"), | |||
| ("topn_tags_float_not_allowed", {"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ("topn_tags_type_invalid", {"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("topn_tags_float_not_allowed", {"topn_tags": 3.14}, "Input should be a valid integer"), | |||
| ("topn_tags_type_invalid", {"topn_tags": "string"}, "Input should be a valid integer"), | |||
| ("filename_embd_weight_min_limit", {"filename_embd_weight": -1}, "Input should be greater than or equal to 0"), | |||
| ("filename_embd_weight_max_limit", {"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"), | |||
| ("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"), | |||
| ("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number"), | |||
| ("task_page_size_min_limit", {"task_page_size": 0}, "Input should be greater than or equal to 1"), | |||
| ("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer"), | |||
| ("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer"), | |||
| ("pages_not_list", {"pages": "1,2"}, "Input should be a valid list"), | |||
| ("pages_not_list_in_list", {"pages": ["1,2"]}, "Input should be a valid list"), | |||
| ("pages_not_int_list", {"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("graphrag_type_invalid", {"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ("pages_not_int_list", {"pages": [["string1", "string2"]]}, "Input should be a valid integer"), | |||
| ("graphrag_type_invalid", {"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean"), | |||
| ("graphrag_entity_types_not_list", {"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"), | |||
| ("graphrag_entity_types_not_str_in_list", {"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"), | |||
| ("graphrag_method_unknown", {"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"), | |||
| ("graphrag_method_none", {"graphrag": {"method": None}}, "Input should be 'light' or 'general'"), | |||
| ("graphrag_community_type_invalid", {"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ("graphrag_resolution_type_invalid", {"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ("raptor_type_invalid", {"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ("graphrag_community_type_invalid", {"graphrag": {"community": "string"}}, "Input should be a valid boolean"), | |||
| ("graphrag_resolution_type_invalid", {"graphrag": {"resolution": "string"}}, "Input should be a valid boolean"), | |||
| ("raptor_type_invalid", {"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean"), | |||
| ("raptor_prompt_empty", {"raptor": {"prompt": ""}}, "String should have at least 1 character"), | |||
| ("raptor_prompt_space", {"raptor": {"prompt": " "}}, "String should have at least 1 character"), | |||
| ("raptor_max_token_min_limit", {"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"), | |||
| ("raptor_max_token_max_limit", {"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"), | |||
| ("raptor_max_token_float_not_allowed", {"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ("raptor_max_token_type_invalid", {"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("raptor_max_token_float_not_allowed", {"raptor": {"max_token": 3.14}}, "Input should be a valid integer"), | |||
| ("raptor_max_token_type_invalid", {"raptor": {"max_token": "string"}}, "Input should be a valid integer"), | |||
| ("raptor_threshold_min_limit", {"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"), | |||
| ("raptor_threshold_max_limit", {"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"), | |||
| ("raptor_threshold_type_invalid", {"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"), | |||
| ("raptor_threshold_type_invalid", {"raptor": {"threshold": "string"}}, "Input should be a valid number"), | |||
| ("raptor_max_cluster_min_limit", {"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"), | |||
| ("raptor_max_cluster_max_limit", {"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"), | |||
| ("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"), | |||
| ("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer"), | |||
| ("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer"), | |||
| ("raptor_random_seed_min_limit", {"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"), | |||
| ("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer"), | |||
| ("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer"), | |||
| ("parser_config_type_invalid", {"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"), | |||
| ], | |||
| ids=[ | |||
| @@ -148,11 +148,8 @@ class TestDatasetsList: | |||
| [ | |||
| ({"orderby": "create_time"}, lambda r: (is_sorted(r["data"], "create_time", True))), | |||
| ({"orderby": "update_time"}, lambda r: (is_sorted(r["data"], "update_time", True))), | |||
| ({"orderby": "CREATE_TIME"}, lambda r: (is_sorted(r["data"], "create_time", True))), | |||
| ({"orderby": "UPDATE_TIME"}, lambda r: (is_sorted(r["data"], "update_time", True))), | |||
| ({"orderby": " create_time "}, lambda r: (is_sorted(r["data"], "update_time", True))), | |||
| ], | |||
| ids=["orderby_create_time", "orderby_update_time", "orderby_create_time_upper", "orderby_update_time_upper", "whitespace"], | |||
| ids=["orderby_create_time", "orderby_update_time"], | |||
| ) | |||
| def test_orderby(self, HttpApiAuth, params, assertions): | |||
| res = list_datasets(HttpApiAuth, params) | |||
| @@ -166,8 +163,11 @@ class TestDatasetsList: | |||
| [ | |||
| {"orderby": ""}, | |||
| {"orderby": "unknown"}, | |||
| ({"orderby": "CREATE_TIME"}, lambda r: (is_sorted(r["data"], "create_time", True))), | |||
| ({"orderby": "UPDATE_TIME"}, lambda r: (is_sorted(r["data"], "update_time", True))), | |||
| ({"orderby": " create_time "}, lambda r: (is_sorted(r["data"], "update_time", True))), | |||
| ], | |||
| ids=["empty", "unknown"], | |||
| ids=["empty", "unknown", "orderby_create_time_upper", "orderby_update_time_upper", "whitespace"], | |||
| ) | |||
| def test_orderby_invalid(self, HttpApiAuth, params): | |||
| res = list_datasets(HttpApiAuth, params) | |||
| @@ -337,11 +337,8 @@ class TestDatasetUpdate: | |||
| [ | |||
| "me", | |||
| "team", | |||
| "ME", | |||
| "TEAM", | |||
| " ME ", | |||
| ], | |||
| ids=["me", "team", "me_upercase", "team_upercase", "whitespace"], | |||
| ids=["me", "team"], | |||
| ) | |||
| def test_permission(self, HttpApiAuth, add_dataset_func, permission): | |||
| dataset_id = add_dataset_func | |||
| @@ -360,8 +357,11 @@ class TestDatasetUpdate: | |||
| "", | |||
| "unknown", | |||
| list(), | |||
| "ME", | |||
| "TEAM", | |||
| " ME ", | |||
| ], | |||
| ids=["empty", "unknown", "type_error"], | |||
| ids=["empty", "unknown", "type_error", "me_upercase", "team_upercase", "whitespace"], | |||
| ) | |||
| def test_permission_invalid(self, HttpApiAuth, add_dataset_func, permission): | |||
| dataset_id = add_dataset_func | |||
| @@ -623,57 +623,57 @@ class TestDatasetUpdate: | |||
| [ | |||
| ({"auto_keywords": -1}, "Input should be greater than or equal to 0"), | |||
| ({"auto_keywords": 33}, "Input should be less than or equal to 32"), | |||
| ({"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ({"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"auto_keywords": 3.14}, "Input should be a valid integer"), | |||
| ({"auto_keywords": "string"}, "Input should be a valid integer"), | |||
| ({"auto_questions": -1}, "Input should be greater than or equal to 0"), | |||
| ({"auto_questions": 11}, "Input should be less than or equal to 10"), | |||
| ({"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ({"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"auto_questions": 3.14}, "Input should be a valid integer"), | |||
| ({"auto_questions": "string"}, "Input should be a valid integer"), | |||
| ({"chunk_token_num": 0}, "Input should be greater than or equal to 1"), | |||
| ({"chunk_token_num": 2049}, "Input should be less than or equal to 2048"), | |||
| ({"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ({"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"chunk_token_num": 3.14}, "Input should be a valid integer"), | |||
| ({"chunk_token_num": "string"}, "Input should be a valid integer"), | |||
| ({"delimiter": ""}, "String should have at least 1 character"), | |||
| ({"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"), | |||
| ({"html4excel": "string"}, "Input should be a valid boolean"), | |||
| ({"tag_kb_ids": "1,2"}, "Input should be a valid list"), | |||
| ({"tag_kb_ids": [1, 2]}, "Input should be a valid string"), | |||
| ({"topn_tags": 0}, "Input should be greater than or equal to 1"), | |||
| ({"topn_tags": 11}, "Input should be less than or equal to 10"), | |||
| ({"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ({"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"topn_tags": 3.14}, "Input should be a valid integer"), | |||
| ({"topn_tags": "string"}, "Input should be a valid integer"), | |||
| ({"filename_embd_weight": -1}, "Input should be greater than or equal to 0"), | |||
| ({"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"), | |||
| ({"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"), | |||
| ({"filename_embd_weight": "string"}, "Input should be a valid number"), | |||
| ({"task_page_size": 0}, "Input should be greater than or equal to 1"), | |||
| ({"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ({"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"task_page_size": 3.14}, "Input should be a valid integer"), | |||
| ({"task_page_size": "string"}, "Input should be a valid integer"), | |||
| ({"pages": "1,2"}, "Input should be a valid list"), | |||
| ({"pages": ["1,2"]}, "Input should be a valid list"), | |||
| ({"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ({"pages": [["string1", "string2"]]}, "Input should be a valid integer"), | |||
| ({"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean"), | |||
| ({"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"), | |||
| ({"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"), | |||
| ({"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"), | |||
| ({"graphrag": {"method": None}}, "Input should be 'light' or 'general'"), | |||
| ({"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ({"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ({"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ({"graphrag": {"community": "string"}}, "Input should be a valid boolean"), | |||
| ({"graphrag": {"resolution": "string"}}, "Input should be a valid boolean"), | |||
| ({"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean"), | |||
| ({"raptor": {"prompt": ""}}, "String should have at least 1 character"), | |||
| ({"raptor": {"prompt": " "}}, "String should have at least 1 character"), | |||
| ({"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"), | |||
| ({"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"), | |||
| ({"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ({"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"raptor": {"max_token": 3.14}}, "Input should be a valid integer"), | |||
| ({"raptor": {"max_token": "string"}}, "Input should be a valid integer"), | |||
| ({"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"), | |||
| ({"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"), | |||
| ({"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"), | |||
| ({"raptor": {"threshold": "string"}}, "Input should be a valid number"), | |||
| ({"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"), | |||
| ({"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"), | |||
| ({"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"), | |||
| ({"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer"), | |||
| ({"raptor": {"max_cluster": "string"}}, "Input should be a valid integer"), | |||
| ({"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"), | |||
| ({"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ({"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"raptor": {"random_seed": 3.14}}, "Input should be a valid integer"), | |||
| ({"raptor": {"random_seed": "string"}}, "Input should be a valid integer"), | |||
| ({"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"), | |||
| ], | |||
| ids=[ | |||
| @@ -254,11 +254,8 @@ class TestDatasetCreate: | |||
| [ | |||
| ("me", "me"), | |||
| ("team", "team"), | |||
| ("me_upercase", "ME"), | |||
| ("team_upercase", "TEAM"), | |||
| ("whitespace", " ME "), | |||
| ], | |||
| ids=["me", "team", "me_upercase", "team_upercase", "whitespace"], | |||
| ids=["me", "team"], | |||
| ) | |||
| def test_permission(self, client, name, permission): | |||
| payload = {"name": name, "permission": permission} | |||
| @@ -271,8 +268,11 @@ class TestDatasetCreate: | |||
| [ | |||
| ("empty", ""), | |||
| ("unknown", "unknown"), | |||
| ("me_upercase", "ME"), | |||
| ("team_upercase", "TEAM"), | |||
| ("whitespace", " ME "), | |||
| ], | |||
| ids=["empty", "unknown"], | |||
| ids=["empty", "unknown", "me_upercase", "team_upercase", "whitespace"], | |||
| ) | |||
| def test_permission_invalid(self, client, name, permission): | |||
| payload = {"name": name, "permission": permission} | |||
| @@ -466,57 +466,57 @@ class TestDatasetCreate: | |||
| [ | |||
| ("auto_keywords_min_limit", {"auto_keywords": -1}, "Input should be greater than or equal to 0"), | |||
| ("auto_keywords_max_limit", {"auto_keywords": 33}, "Input should be less than or equal to 32"), | |||
| ("auto_keywords_float_not_allowed", {"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ("auto_keywords_type_invalid", {"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("auto_keywords_float_not_allowed", {"auto_keywords": 3.14}, "Input should be a valid integer"), | |||
| ("auto_keywords_type_invalid", {"auto_keywords": "string"}, "Input should be a valid integer"), | |||
| ("auto_questions_min_limit", {"auto_questions": -1}, "Input should be greater than or equal to 0"), | |||
| ("auto_questions_max_limit", {"auto_questions": 11}, "Input should be less than or equal to 10"), | |||
| ("auto_questions_float_not_allowed", {"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ("auto_questions_type_invalid", {"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("auto_questions_float_not_allowed", {"auto_questions": 3.14}, "Input should be a valid integer"), | |||
| ("auto_questions_type_invalid", {"auto_questions": "string"}, "Input should be a valid integer"), | |||
| ("chunk_token_num_min_limit", {"chunk_token_num": 0}, "Input should be greater than or equal to 1"), | |||
| ("chunk_token_num_max_limit", {"chunk_token_num": 2049}, "Input should be less than or equal to 2048"), | |||
| ("chunk_token_num_float_not_allowed", {"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ("chunk_token_num_type_invalid", {"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("chunk_token_num_float_not_allowed", {"chunk_token_num": 3.14}, "Input should be a valid integer"), | |||
| ("chunk_token_num_type_invalid", {"chunk_token_num": "string"}, "Input should be a valid integer"), | |||
| ("delimiter_empty", {"delimiter": ""}, "String should have at least 1 character"), | |||
| ("html4excel_type_invalid", {"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"), | |||
| ("html4excel_type_invalid", {"html4excel": "string"}, "Input should be a valid boolean"), | |||
| ("tag_kb_ids_not_list", {"tag_kb_ids": "1,2"}, "Input should be a valid list"), | |||
| ("tag_kb_ids_int_in_list", {"tag_kb_ids": [1, 2]}, "Input should be a valid string"), | |||
| ("topn_tags_min_limit", {"topn_tags": 0}, "Input should be greater than or equal to 1"), | |||
| ("topn_tags_max_limit", {"topn_tags": 11}, "Input should be less than or equal to 10"), | |||
| ("topn_tags_float_not_allowed", {"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ("topn_tags_type_invalid", {"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("topn_tags_float_not_allowed", {"topn_tags": 3.14}, "Input should be a valid integer"), | |||
| ("topn_tags_type_invalid", {"topn_tags": "string"}, "Input should be a valid integer"), | |||
| ("filename_embd_weight_min_limit", {"filename_embd_weight": -1}, "Input should be greater than or equal to 0"), | |||
| ("filename_embd_weight_max_limit", {"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"), | |||
| ("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"), | |||
| ("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number"), | |||
| ("task_page_size_min_limit", {"task_page_size": 0}, "Input should be greater than or equal to 1"), | |||
| ("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer"), | |||
| ("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer"), | |||
| ("pages_not_list", {"pages": "1,2"}, "Input should be a valid list"), | |||
| ("pages_not_list_in_list", {"pages": ["1,2"]}, "Input should be a valid list"), | |||
| ("pages_not_int_list", {"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("graphrag_type_invalid", {"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ("pages_not_int_list", {"pages": [["string1", "string2"]]}, "Input should be a valid integer"), | |||
| ("graphrag_type_invalid", {"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean"), | |||
| ("graphrag_entity_types_not_list", {"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"), | |||
| ("graphrag_entity_types_not_str_in_list", {"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"), | |||
| ("graphrag_method_unknown", {"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"), | |||
| ("graphrag_method_none", {"graphrag": {"method": None}}, "Input should be 'light' or 'general'"), | |||
| ("graphrag_community_type_invalid", {"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ("graphrag_resolution_type_invalid", {"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ("raptor_type_invalid", {"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ("graphrag_community_type_invalid", {"graphrag": {"community": "string"}}, "Input should be a valid boolean"), | |||
| ("graphrag_resolution_type_invalid", {"graphrag": {"resolution": "string"}}, "Input should be a valid boolean"), | |||
| ("raptor_type_invalid", {"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean"), | |||
| ("raptor_prompt_empty", {"raptor": {"prompt": ""}}, "String should have at least 1 character"), | |||
| ("raptor_prompt_space", {"raptor": {"prompt": " "}}, "String should have at least 1 character"), | |||
| ("raptor_max_token_min_limit", {"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"), | |||
| ("raptor_max_token_max_limit", {"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"), | |||
| ("raptor_max_token_float_not_allowed", {"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ("raptor_max_token_type_invalid", {"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("raptor_max_token_float_not_allowed", {"raptor": {"max_token": 3.14}}, "Input should be a valid integer"), | |||
| ("raptor_max_token_type_invalid", {"raptor": {"max_token": "string"}}, "Input should be a valid integer"), | |||
| ("raptor_threshold_min_limit", {"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"), | |||
| ("raptor_threshold_max_limit", {"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"), | |||
| ("raptor_threshold_type_invalid", {"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"), | |||
| ("raptor_threshold_type_invalid", {"raptor": {"threshold": "string"}}, "Input should be a valid number"), | |||
| ("raptor_max_cluster_min_limit", {"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"), | |||
| ("raptor_max_cluster_max_limit", {"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"), | |||
| ("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"), | |||
| ("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer"), | |||
| ("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer"), | |||
| ("raptor_random_seed_min_limit", {"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"), | |||
| ("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer"), | |||
| ("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer"), | |||
| ("parser_config_type_invalid", {"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"), | |||
| ], | |||
| ids=[ | |||
| @@ -141,11 +141,8 @@ class TestDatasetsList: | |||
| [ | |||
| {"orderby": "create_time"}, | |||
| {"orderby": "update_time"}, | |||
| {"orderby": "CREATE_TIME"}, | |||
| {"orderby": "UPDATE_TIME"}, | |||
| {"orderby": " create_time "}, | |||
| ], | |||
| ids=["orderby_create_time", "orderby_update_time", "orderby_create_time_upper", "orderby_update_time_upper", "whitespace"], | |||
| ids=["orderby_create_time", "orderby_update_time"], | |||
| ) | |||
| def test_orderby(self, client, params): | |||
| client.list_datasets(**params) | |||
| @@ -156,8 +153,11 @@ class TestDatasetsList: | |||
| [ | |||
| {"orderby": ""}, | |||
| {"orderby": "unknown"}, | |||
| {"orderby": "CREATE_TIME"}, | |||
| {"orderby": "UPDATE_TIME"}, | |||
| {"orderby": " create_time "}, | |||
| ], | |||
| ids=["empty", "unknown"], | |||
| ids=["empty", "unknown", "orderby_create_time_upper", "orderby_update_time_upper", "whitespace"], | |||
| ) | |||
| def test_orderby_invalid(self, client, params): | |||
| with pytest.raises(Exception) as excinfo: | |||
| @@ -242,11 +242,8 @@ class TestDatasetUpdate: | |||
| [ | |||
| "me", | |||
| "team", | |||
| "ME", | |||
| "TEAM", | |||
| " ME ", | |||
| ], | |||
| ids=["me", "team", "me_upercase", "team_upercase", "whitespace"], | |||
| ids=["me", "team"], | |||
| ) | |||
| def test_permission(self, client, add_dataset_func, permission): | |||
| dataset = add_dataset_func | |||
| @@ -263,8 +260,11 @@ class TestDatasetUpdate: | |||
| "", | |||
| "unknown", | |||
| list(), | |||
| "ME", | |||
| "TEAM", | |||
| " ME ", | |||
| ], | |||
| ids=["empty", "unknown", "type_error"], | |||
| ids=["empty", "unknown", "type_error", "me_upercase", "team_upercase", "whitespace"], | |||
| ) | |||
| def test_permission_invalid(self, add_dataset_func, permission): | |||
| dataset = add_dataset_func | |||
| @@ -514,57 +514,57 @@ class TestDatasetUpdate: | |||
| [ | |||
| ({"auto_keywords": -1}, "Input should be greater than or equal to 0"), | |||
| ({"auto_keywords": 33}, "Input should be less than or equal to 32"), | |||
| ({"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ({"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"auto_keywords": 3.14}, "Input should be a valid integer"), | |||
| ({"auto_keywords": "string"}, "Input should be a valid integer"), | |||
| ({"auto_questions": -1}, "Input should be greater than or equal to 0"), | |||
| ({"auto_questions": 11}, "Input should be less than or equal to 10"), | |||
| ({"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ({"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"auto_questions": 3.14}, "Input should be a valid integer"), | |||
| ({"auto_questions": "string"}, "Input should be a valid integer"), | |||
| ({"chunk_token_num": 0}, "Input should be greater than or equal to 1"), | |||
| ({"chunk_token_num": 2049}, "Input should be less than or equal to 2048"), | |||
| ({"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ({"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"chunk_token_num": 3.14}, "Input should be a valid integer"), | |||
| ({"chunk_token_num": "string"}, "Input should be a valid integer"), | |||
| ({"delimiter": ""}, "String should have at least 1 character"), | |||
| ({"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"), | |||
| ({"html4excel": "string"}, "Input should be a valid boolean"), | |||
| ({"tag_kb_ids": "1,2"}, "Input should be a valid list"), | |||
| ({"tag_kb_ids": [1, 2]}, "Input should be a valid string"), | |||
| ({"topn_tags": 0}, "Input should be greater than or equal to 1"), | |||
| ({"topn_tags": 11}, "Input should be less than or equal to 10"), | |||
| ({"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ({"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"topn_tags": 3.14}, "Input should be a valid integer"), | |||
| ({"topn_tags": "string"}, "Input should be a valid integer"), | |||
| ({"filename_embd_weight": -1}, "Input should be greater than or equal to 0"), | |||
| ({"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"), | |||
| ({"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"), | |||
| ({"filename_embd_weight": "string"}, "Input should be a valid number"), | |||
| ({"task_page_size": 0}, "Input should be greater than or equal to 1"), | |||
| ({"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ({"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"task_page_size": 3.14}, "Input should be a valid integer"), | |||
| ({"task_page_size": "string"}, "Input should be a valid integer"), | |||
| ({"pages": "1,2"}, "Input should be a valid list"), | |||
| ({"pages": ["1,2"]}, "Input should be a valid list"), | |||
| ({"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ({"pages": [["string1", "string2"]]}, "Input should be a valid integer"), | |||
| ({"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean"), | |||
| ({"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"), | |||
| ({"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"), | |||
| ({"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"), | |||
| ({"graphrag": {"method": None}}, "Input should be 'light' or 'general'"), | |||
| ({"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ({"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ({"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||
| ({"graphrag": {"community": "string"}}, "Input should be a valid boolean"), | |||
| ({"graphrag": {"resolution": "string"}}, "Input should be a valid boolean"), | |||
| ({"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean"), | |||
| ({"raptor": {"prompt": ""}}, "String should have at least 1 character"), | |||
| ({"raptor": {"prompt": " "}}, "String should have at least 1 character"), | |||
| ({"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"), | |||
| ({"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"), | |||
| ({"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ({"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"raptor": {"max_token": 3.14}}, "Input should be a valid integer"), | |||
| ({"raptor": {"max_token": "string"}}, "Input should be a valid integer"), | |||
| ({"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"), | |||
| ({"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"), | |||
| ({"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"), | |||
| ({"raptor": {"threshold": "string"}}, "Input should be a valid number"), | |||
| ({"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"), | |||
| ({"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"), | |||
| ({"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"), | |||
| ({"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer"), | |||
| ({"raptor": {"max_cluster": "string"}}, "Input should be a valid integer"), | |||
| ({"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"), | |||
| ({"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||
| ({"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||
| ({"raptor": {"random_seed": 3.14}}, "Input should be a valid integer"), | |||
| ({"raptor": {"random_seed": "string"}}, "Input should be a valid integer"), | |||
| ({"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"), | |||
| ], | |||
| ids=[ | |||