### What problem does this PR solve? - Update BaseModel to use model_config instead of Config class - Replace StrEnum with Literal types for method fields - Convert Field declarations to Annotated style ### Type of change - [x] Refactoringtags/v0.20.0
| # limitations under the License. | # limitations under the License. | ||||
| # | # | ||||
| from collections import Counter | from collections import Counter | ||||
| from enum import auto | |||||
| from typing import Annotated, Any | |||||
| from typing import Annotated, Any, Literal | |||||
| from uuid import UUID | from uuid import UUID | ||||
| from flask import Request | from flask import Request | ||||
| from pydantic import BaseModel, Field, StringConstraints, ValidationError, field_validator | |||||
| from pydantic import ( | |||||
| BaseModel, | |||||
| ConfigDict, | |||||
| Field, | |||||
| StringConstraints, | |||||
| ValidationError, | |||||
| field_validator, | |||||
| ) | |||||
| from pydantic_core import PydanticCustomError | from pydantic_core import PydanticCustomError | ||||
| from strenum import StrEnum | |||||
| from werkzeug.exceptions import BadRequest, UnsupportedMediaType | from werkzeug.exceptions import BadRequest, UnsupportedMediaType | ||||
| from api.constants import DATASET_NAME_LIMIT | from api.constants import DATASET_NAME_LIMIT | ||||
| raise PydanticCustomError("invalid_UUID1_format", "Invalid UUID1 format") | raise PydanticCustomError("invalid_UUID1_format", "Invalid UUID1 format") | ||||
| class PermissionEnum(StrEnum): | |||||
| me = auto() | |||||
| team = auto() | |||||
| class ChunkMethodEnum(StrEnum): | |||||
| naive = auto() | |||||
| book = auto() | |||||
| email = auto() | |||||
| laws = auto() | |||||
| manual = auto() | |||||
| one = auto() | |||||
| paper = auto() | |||||
| picture = auto() | |||||
| presentation = auto() | |||||
| qa = auto() | |||||
| table = auto() | |||||
| tag = auto() | |||||
| class GraphragMethodEnum(StrEnum): | |||||
| light = auto() | |||||
| general = auto() | |||||
| class Base(BaseModel): | class Base(BaseModel): | ||||
| class Config: | |||||
| extra = "forbid" | |||||
| model_config = ConfigDict(extra="forbid", strict=True) | |||||
| class RaptorConfig(Base): | class RaptorConfig(Base): | ||||
| use_raptor: bool = Field(default=False) | |||||
| use_raptor: Annotated[bool, Field(default=False)] | |||||
| prompt: Annotated[ | prompt: Annotated[ | ||||
| str, | str, | ||||
| StringConstraints(strip_whitespace=True, min_length=1), | StringConstraints(strip_whitespace=True, min_length=1), | ||||
| default="Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize." | default="Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize." | ||||
| ), | ), | ||||
| ] | ] | ||||
| max_token: int = Field(default=256, ge=1, le=2048) | |||||
| threshold: float = Field(default=0.1, ge=0.0, le=1.0) | |||||
| max_cluster: int = Field(default=64, ge=1, le=1024) | |||||
| random_seed: int = Field(default=0, ge=0) | |||||
| max_token: Annotated[int, Field(default=256, ge=1, le=2048)] | |||||
| threshold: Annotated[float, Field(default=0.1, ge=0.0, le=1.0)] | |||||
| max_cluster: Annotated[int, Field(default=64, ge=1, le=1024)] | |||||
| random_seed: Annotated[int, Field(default=0, ge=0)] | |||||
| class GraphragConfig(Base): | class GraphragConfig(Base): | ||||
| use_graphrag: bool = Field(default=False) | |||||
| entity_types: list[str] = Field(default_factory=lambda: ["organization", "person", "geo", "event", "category"]) | |||||
| method: GraphragMethodEnum = Field(default=GraphragMethodEnum.light) | |||||
| community: bool = Field(default=False) | |||||
| resolution: bool = Field(default=False) | |||||
| use_graphrag: Annotated[bool, Field(default=False)] | |||||
| entity_types: Annotated[list[str], Field(default_factory=lambda: ["organization", "person", "geo", "event", "category"])] | |||||
| method: Annotated[Literal["light", "general"], Field(default="light")] | |||||
| community: Annotated[bool, Field(default=False)] | |||||
| resolution: Annotated[bool, Field(default=False)] | |||||
| class ParserConfig(Base): | class ParserConfig(Base): | ||||
| auto_keywords: int = Field(default=0, ge=0, le=32) | |||||
| auto_questions: int = Field(default=0, ge=0, le=10) | |||||
| chunk_token_num: int = Field(default=512, ge=1, le=2048) | |||||
| delimiter: str = Field(default=r"\n", min_length=1) | |||||
| graphrag: GraphragConfig = Field(default_factory=lambda: GraphragConfig(use_graphrag=False)) | |||||
| html4excel: bool = False | |||||
| layout_recognize: str = "DeepDOC" | |||||
| raptor: RaptorConfig = Field(default_factory=lambda: RaptorConfig(use_raptor=False)) | |||||
| tag_kb_ids: list[str] = Field(default_factory=list) | |||||
| topn_tags: int = Field(default=1, ge=1, le=10) | |||||
| filename_embd_weight: float | None = Field(default=0.1, ge=0.0, le=1.0) | |||||
| task_page_size: int | None = Field(default=None, ge=1) | |||||
| pages: list[list[int]] | None = None | |||||
| auto_keywords: Annotated[int, Field(default=0, ge=0, le=32)] | |||||
| auto_questions: Annotated[int, Field(default=0, ge=0, le=10)] | |||||
| chunk_token_num: Annotated[int, Field(default=512, ge=1, le=2048)] | |||||
| delimiter: Annotated[str, Field(default=r"\n", min_length=1)] | |||||
| graphrag: Annotated[GraphragConfig, Field(default_factory=lambda: GraphragConfig(use_graphrag=False))] | |||||
| html4excel: Annotated[bool, Field(default=False)] | |||||
| layout_recognize: Annotated[str, Field(default="DeepDOC")] | |||||
| raptor: Annotated[RaptorConfig, Field(default_factory=lambda: RaptorConfig(use_raptor=False))] | |||||
| tag_kb_ids: Annotated[list[str], Field(default_factory=list)] | |||||
| topn_tags: Annotated[int, Field(default=1, ge=1, le=10)] | |||||
| filename_embd_weight: Annotated[float | None, Field(default=0.1, ge=0.0, le=1.0)] | |||||
| task_page_size: Annotated[int | None, Field(default=None, ge=1)] | |||||
| pages: Annotated[list[list[int]] | None, Field(default=None)] | |||||
| class CreateDatasetReq(Base): | class CreateDatasetReq(Base): | ||||
| name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=DATASET_NAME_LIMIT), Field(...)] | name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=DATASET_NAME_LIMIT), Field(...)] | ||||
| avatar: str | None = Field(default=None, max_length=65535) | |||||
| description: str | None = Field(default=None, max_length=65535) | |||||
| embedding_model: str | None = Field(default=None, max_length=255, serialization_alias="embd_id") | |||||
| permission: PermissionEnum = Field(default=PermissionEnum.me, min_length=1, max_length=16) | |||||
| chunk_method: ChunkMethodEnum = Field(default=ChunkMethodEnum.naive, min_length=1, max_length=32, serialization_alias="parser_id") | |||||
| parser_config: ParserConfig | None = Field(default=None) | |||||
| @field_validator("avatar") | |||||
| avatar: Annotated[str | None, Field(default=None, max_length=65535)] | |||||
| description: Annotated[str | None, Field(default=None, max_length=65535)] | |||||
| embedding_model: Annotated[str | None, Field(default=None, max_length=255, serialization_alias="embd_id")] | |||||
| permission: Annotated[Literal["me", "team"], Field(default="me", min_length=1, max_length=16)] | |||||
| chunk_method: Annotated[ | |||||
| Literal["naive", "book", "email", "laws", "manual", "one", "paper", "picture", "presentation", "qa", "table", "tag"], | |||||
| Field(default="naive", min_length=1, max_length=32, serialization_alias="parser_id"), | |||||
| ] | |||||
| parser_config: Annotated[ParserConfig | None, Field(default=None)] | |||||
| @field_validator("avatar", mode="after") | |||||
| @classmethod | @classmethod | ||||
| def validate_avatar_base64(cls, v: str | None) -> str | None: | def validate_avatar_base64(cls, v: str | None) -> str | None: | ||||
| """ | """ | ||||
| @field_validator("embedding_model", mode="before") | @field_validator("embedding_model", mode="before") | ||||
| @classmethod | @classmethod | ||||
| def normalize_embedding_model(cls, v: Any) -> Any: | def normalize_embedding_model(cls, v: Any) -> Any: | ||||
| """Normalize embedding model string by stripping whitespace""" | |||||
| if isinstance(v, str): | if isinstance(v, str): | ||||
| return v.strip() | return v.strip() | ||||
| return v | return v | ||||
| raise PydanticCustomError("format_invalid", "Model name and provider cannot be whitespace-only strings") | raise PydanticCustomError("format_invalid", "Model name and provider cannot be whitespace-only strings") | ||||
| return v | return v | ||||
| @field_validator("permission", mode="before") | |||||
| @classmethod | |||||
| def normalize_permission(cls, v: Any) -> Any: | |||||
| return normalize_str(v) | |||||
| # @field_validator("permission", mode="before") | |||||
| # @classmethod | |||||
| # def normalize_permission(cls, v: Any) -> Any: | |||||
| # return normalize_str(v) | |||||
| @field_validator("parser_config", mode="before") | @field_validator("parser_config", mode="before") | ||||
| @classmethod | @classmethod | ||||
| class UpdateDatasetReq(CreateDatasetReq): | class UpdateDatasetReq(CreateDatasetReq): | ||||
| dataset_id: str = Field(...) | |||||
| dataset_id: Annotated[str, Field(...)] | |||||
| name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=DATASET_NAME_LIMIT), Field(default="")] | name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=DATASET_NAME_LIMIT), Field(default="")] | ||||
| pagerank: int = Field(default=0, ge=0, le=100) | |||||
| pagerank: Annotated[int, Field(default=0, ge=0, le=100)] | |||||
| @field_validator("dataset_id", mode="before") | @field_validator("dataset_id", mode="before") | ||||
| @classmethod | @classmethod | ||||
| class DeleteReq(Base): | class DeleteReq(Base): | ||||
| ids: list[str] | None = Field(...) | |||||
| ids: Annotated[list[str] | None, Field(...)] | |||||
| @field_validator("ids", mode="after") | @field_validator("ids", mode="after") | ||||
| @classmethod | @classmethod | ||||
| class DeleteDatasetReq(DeleteReq): ... | class DeleteDatasetReq(DeleteReq): ... | ||||
| class OrderByEnum(StrEnum): | |||||
| create_time = auto() | |||||
| update_time = auto() | |||||
| class BaseListReq(BaseModel): | |||||
| model_config = ConfigDict(extra="forbid") | |||||
| class BaseListReq(Base): | |||||
| id: str | None = None | |||||
| name: str | None = None | |||||
| page: int = Field(default=1, ge=1) | |||||
| page_size: int = Field(default=30, ge=1) | |||||
| orderby: OrderByEnum = Field(default=OrderByEnum.create_time) | |||||
| desc: bool = Field(default=True) | |||||
| id: Annotated[str | None, Field(default=None)] | |||||
| name: Annotated[str | None, Field(default=None)] | |||||
| page: Annotated[int, Field(default=1, ge=1)] | |||||
| page_size: Annotated[int, Field(default=30, ge=1)] | |||||
| orderby: Annotated[Literal["create_time", "update_time"], Field(default="create_time")] | |||||
| desc: Annotated[bool, Field(default=True)] | |||||
| @field_validator("id", mode="before") | @field_validator("id", mode="before") | ||||
| @classmethod | @classmethod | ||||
| def validate_id(cls, v: Any) -> str: | def validate_id(cls, v: Any) -> str: | ||||
| return validate_uuid1_hex(v) | return validate_uuid1_hex(v) | ||||
| @field_validator("orderby", mode="before") | |||||
| @classmethod | |||||
| def normalize_orderby(cls, v: Any) -> Any: | |||||
| return normalize_str(v) | |||||
| class ListDatasetReq(BaseListReq): ... | class ListDatasetReq(BaseListReq): ... |
| [ | [ | ||||
| ("me", "me"), | ("me", "me"), | ||||
| ("team", "team"), | ("team", "team"), | ||||
| ("me_upercase", "ME"), | |||||
| ("team_upercase", "TEAM"), | |||||
| ("whitespace", " ME "), | |||||
| ], | ], | ||||
| ids=["me", "team", "me_upercase", "team_upercase", "whitespace"], | |||||
| ids=["me", "team"], | |||||
| ) | ) | ||||
| def test_permission(self, HttpApiAuth, name, permission): | def test_permission(self, HttpApiAuth, name, permission): | ||||
| payload = {"name": name, "permission": permission} | payload = {"name": name, "permission": permission} | ||||
| ("empty", ""), | ("empty", ""), | ||||
| ("unknown", "unknown"), | ("unknown", "unknown"), | ||||
| ("type_error", list()), | ("type_error", list()), | ||||
| ("me_upercase", "ME"), | |||||
| ("team_upercase", "TEAM"), | |||||
| ("whitespace", " ME "), | |||||
| ], | ], | ||||
| ids=["empty", "unknown", "type_error"], | |||||
| ids=["empty", "unknown", "type_error", "me_upercase", "team_upercase", "whitespace"], | |||||
| ) | ) | ||||
| def test_permission_invalid(self, HttpApiAuth, name, permission): | def test_permission_invalid(self, HttpApiAuth, name, permission): | ||||
| payload = {"name": name, "permission": permission} | payload = {"name": name, "permission": permission} | ||||
| [ | [ | ||||
| ("auto_keywords_min_limit", {"auto_keywords": -1}, "Input should be greater than or equal to 0"), | ("auto_keywords_min_limit", {"auto_keywords": -1}, "Input should be greater than or equal to 0"), | ||||
| ("auto_keywords_max_limit", {"auto_keywords": 33}, "Input should be less than or equal to 32"), | ("auto_keywords_max_limit", {"auto_keywords": 33}, "Input should be less than or equal to 32"), | ||||
| ("auto_keywords_float_not_allowed", {"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ("auto_keywords_type_invalid", {"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("auto_keywords_float_not_allowed", {"auto_keywords": 3.14}, "Input should be a valid integer"), | |||||
| ("auto_keywords_type_invalid", {"auto_keywords": "string"}, "Input should be a valid integer"), | |||||
| ("auto_questions_min_limit", {"auto_questions": -1}, "Input should be greater than or equal to 0"), | ("auto_questions_min_limit", {"auto_questions": -1}, "Input should be greater than or equal to 0"), | ||||
| ("auto_questions_max_limit", {"auto_questions": 11}, "Input should be less than or equal to 10"), | ("auto_questions_max_limit", {"auto_questions": 11}, "Input should be less than or equal to 10"), | ||||
| ("auto_questions_float_not_allowed", {"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ("auto_questions_type_invalid", {"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("auto_questions_float_not_allowed", {"auto_questions": 3.14}, "Input should be a valid integer"), | |||||
| ("auto_questions_type_invalid", {"auto_questions": "string"}, "Input should be a valid integer"), | |||||
| ("chunk_token_num_min_limit", {"chunk_token_num": 0}, "Input should be greater than or equal to 1"), | ("chunk_token_num_min_limit", {"chunk_token_num": 0}, "Input should be greater than or equal to 1"), | ||||
| ("chunk_token_num_max_limit", {"chunk_token_num": 2049}, "Input should be less than or equal to 2048"), | ("chunk_token_num_max_limit", {"chunk_token_num": 2049}, "Input should be less than or equal to 2048"), | ||||
| ("chunk_token_num_float_not_allowed", {"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ("chunk_token_num_type_invalid", {"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("chunk_token_num_float_not_allowed", {"chunk_token_num": 3.14}, "Input should be a valid integer"), | |||||
| ("chunk_token_num_type_invalid", {"chunk_token_num": "string"}, "Input should be a valid integer"), | |||||
| ("delimiter_empty", {"delimiter": ""}, "String should have at least 1 character"), | ("delimiter_empty", {"delimiter": ""}, "String should have at least 1 character"), | ||||
| ("html4excel_type_invalid", {"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ("html4excel_type_invalid", {"html4excel": "string"}, "Input should be a valid boolean"), | |||||
| ("tag_kb_ids_not_list", {"tag_kb_ids": "1,2"}, "Input should be a valid list"), | ("tag_kb_ids_not_list", {"tag_kb_ids": "1,2"}, "Input should be a valid list"), | ||||
| ("tag_kb_ids_int_in_list", {"tag_kb_ids": [1, 2]}, "Input should be a valid string"), | ("tag_kb_ids_int_in_list", {"tag_kb_ids": [1, 2]}, "Input should be a valid string"), | ||||
| ("topn_tags_min_limit", {"topn_tags": 0}, "Input should be greater than or equal to 1"), | ("topn_tags_min_limit", {"topn_tags": 0}, "Input should be greater than or equal to 1"), | ||||
| ("topn_tags_max_limit", {"topn_tags": 11}, "Input should be less than or equal to 10"), | ("topn_tags_max_limit", {"topn_tags": 11}, "Input should be less than or equal to 10"), | ||||
| ("topn_tags_float_not_allowed", {"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ("topn_tags_type_invalid", {"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("topn_tags_float_not_allowed", {"topn_tags": 3.14}, "Input should be a valid integer"), | |||||
| ("topn_tags_type_invalid", {"topn_tags": "string"}, "Input should be a valid integer"), | |||||
| ("filename_embd_weight_min_limit", {"filename_embd_weight": -1}, "Input should be greater than or equal to 0"), | ("filename_embd_weight_min_limit", {"filename_embd_weight": -1}, "Input should be greater than or equal to 0"), | ||||
| ("filename_embd_weight_max_limit", {"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"), | ("filename_embd_weight_max_limit", {"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"), | ||||
| ("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"), | |||||
| ("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number"), | |||||
| ("task_page_size_min_limit", {"task_page_size": 0}, "Input should be greater than or equal to 1"), | ("task_page_size_min_limit", {"task_page_size": 0}, "Input should be greater than or equal to 1"), | ||||
| ("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer"), | |||||
| ("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer"), | |||||
| ("pages_not_list", {"pages": "1,2"}, "Input should be a valid list"), | ("pages_not_list", {"pages": "1,2"}, "Input should be a valid list"), | ||||
| ("pages_not_list_in_list", {"pages": ["1,2"]}, "Input should be a valid list"), | ("pages_not_list_in_list", {"pages": ["1,2"]}, "Input should be a valid list"), | ||||
| ("pages_not_int_list", {"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("graphrag_type_invalid", {"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ("pages_not_int_list", {"pages": [["string1", "string2"]]}, "Input should be a valid integer"), | |||||
| ("graphrag_type_invalid", {"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean"), | |||||
| ("graphrag_entity_types_not_list", {"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"), | ("graphrag_entity_types_not_list", {"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"), | ||||
| ("graphrag_entity_types_not_str_in_list", {"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"), | ("graphrag_entity_types_not_str_in_list", {"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"), | ||||
| ("graphrag_method_unknown", {"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"), | ("graphrag_method_unknown", {"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"), | ||||
| ("graphrag_method_none", {"graphrag": {"method": None}}, "Input should be 'light' or 'general'"), | ("graphrag_method_none", {"graphrag": {"method": None}}, "Input should be 'light' or 'general'"), | ||||
| ("graphrag_community_type_invalid", {"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ("graphrag_resolution_type_invalid", {"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ("raptor_type_invalid", {"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ("graphrag_community_type_invalid", {"graphrag": {"community": "string"}}, "Input should be a valid boolean"), | |||||
| ("graphrag_resolution_type_invalid", {"graphrag": {"resolution": "string"}}, "Input should be a valid boolean"), | |||||
| ("raptor_type_invalid", {"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean"), | |||||
| ("raptor_prompt_empty", {"raptor": {"prompt": ""}}, "String should have at least 1 character"), | ("raptor_prompt_empty", {"raptor": {"prompt": ""}}, "String should have at least 1 character"), | ||||
| ("raptor_prompt_space", {"raptor": {"prompt": " "}}, "String should have at least 1 character"), | ("raptor_prompt_space", {"raptor": {"prompt": " "}}, "String should have at least 1 character"), | ||||
| ("raptor_max_token_min_limit", {"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"), | ("raptor_max_token_min_limit", {"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"), | ||||
| ("raptor_max_token_max_limit", {"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"), | ("raptor_max_token_max_limit", {"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"), | ||||
| ("raptor_max_token_float_not_allowed", {"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ("raptor_max_token_type_invalid", {"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("raptor_max_token_float_not_allowed", {"raptor": {"max_token": 3.14}}, "Input should be a valid integer"), | |||||
| ("raptor_max_token_type_invalid", {"raptor": {"max_token": "string"}}, "Input should be a valid integer"), | |||||
| ("raptor_threshold_min_limit", {"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"), | ("raptor_threshold_min_limit", {"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"), | ||||
| ("raptor_threshold_max_limit", {"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"), | ("raptor_threshold_max_limit", {"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"), | ||||
| ("raptor_threshold_type_invalid", {"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"), | |||||
| ("raptor_threshold_type_invalid", {"raptor": {"threshold": "string"}}, "Input should be a valid number"), | |||||
| ("raptor_max_cluster_min_limit", {"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"), | ("raptor_max_cluster_min_limit", {"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"), | ||||
| ("raptor_max_cluster_max_limit", {"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"), | ("raptor_max_cluster_max_limit", {"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"), | ||||
| ("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"), | |||||
| ("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer"), | |||||
| ("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer"), | |||||
| ("raptor_random_seed_min_limit", {"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"), | ("raptor_random_seed_min_limit", {"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"), | ||||
| ("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer"), | |||||
| ("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer"), | |||||
| ("parser_config_type_invalid", {"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"), | ("parser_config_type_invalid", {"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"), | ||||
| ], | ], | ||||
| ids=[ | ids=[ |
| [ | [ | ||||
| ({"orderby": "create_time"}, lambda r: (is_sorted(r["data"], "create_time", True))), | ({"orderby": "create_time"}, lambda r: (is_sorted(r["data"], "create_time", True))), | ||||
| ({"orderby": "update_time"}, lambda r: (is_sorted(r["data"], "update_time", True))), | ({"orderby": "update_time"}, lambda r: (is_sorted(r["data"], "update_time", True))), | ||||
| ({"orderby": "CREATE_TIME"}, lambda r: (is_sorted(r["data"], "create_time", True))), | |||||
| ({"orderby": "UPDATE_TIME"}, lambda r: (is_sorted(r["data"], "update_time", True))), | |||||
| ({"orderby": " create_time "}, lambda r: (is_sorted(r["data"], "update_time", True))), | |||||
| ], | ], | ||||
| ids=["orderby_create_time", "orderby_update_time", "orderby_create_time_upper", "orderby_update_time_upper", "whitespace"], | |||||
| ids=["orderby_create_time", "orderby_update_time"], | |||||
| ) | ) | ||||
| def test_orderby(self, HttpApiAuth, params, assertions): | def test_orderby(self, HttpApiAuth, params, assertions): | ||||
| res = list_datasets(HttpApiAuth, params) | res = list_datasets(HttpApiAuth, params) | ||||
| [ | [ | ||||
| {"orderby": ""}, | {"orderby": ""}, | ||||
| {"orderby": "unknown"}, | {"orderby": "unknown"}, | ||||
| ({"orderby": "CREATE_TIME"}, lambda r: (is_sorted(r["data"], "create_time", True))), | |||||
| ({"orderby": "UPDATE_TIME"}, lambda r: (is_sorted(r["data"], "update_time", True))), | |||||
| ({"orderby": " create_time "}, lambda r: (is_sorted(r["data"], "update_time", True))), | |||||
| ], | ], | ||||
| ids=["empty", "unknown"], | |||||
| ids=["empty", "unknown", "orderby_create_time_upper", "orderby_update_time_upper", "whitespace"], | |||||
| ) | ) | ||||
| def test_orderby_invalid(self, HttpApiAuth, params): | def test_orderby_invalid(self, HttpApiAuth, params): | ||||
| res = list_datasets(HttpApiAuth, params) | res = list_datasets(HttpApiAuth, params) |
| [ | [ | ||||
| "me", | "me", | ||||
| "team", | "team", | ||||
| "ME", | |||||
| "TEAM", | |||||
| " ME ", | |||||
| ], | ], | ||||
| ids=["me", "team", "me_upercase", "team_upercase", "whitespace"], | |||||
| ids=["me", "team"], | |||||
| ) | ) | ||||
| def test_permission(self, HttpApiAuth, add_dataset_func, permission): | def test_permission(self, HttpApiAuth, add_dataset_func, permission): | ||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| "", | "", | ||||
| "unknown", | "unknown", | ||||
| list(), | list(), | ||||
| "ME", | |||||
| "TEAM", | |||||
| " ME ", | |||||
| ], | ], | ||||
| ids=["empty", "unknown", "type_error"], | |||||
| ids=["empty", "unknown", "type_error", "me_upercase", "team_upercase", "whitespace"], | |||||
| ) | ) | ||||
| def test_permission_invalid(self, HttpApiAuth, add_dataset_func, permission): | def test_permission_invalid(self, HttpApiAuth, add_dataset_func, permission): | ||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| [ | [ | ||||
| ({"auto_keywords": -1}, "Input should be greater than or equal to 0"), | ({"auto_keywords": -1}, "Input should be greater than or equal to 0"), | ||||
| ({"auto_keywords": 33}, "Input should be less than or equal to 32"), | ({"auto_keywords": 33}, "Input should be less than or equal to 32"), | ||||
| ({"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ({"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"auto_keywords": 3.14}, "Input should be a valid integer"), | |||||
| ({"auto_keywords": "string"}, "Input should be a valid integer"), | |||||
| ({"auto_questions": -1}, "Input should be greater than or equal to 0"), | ({"auto_questions": -1}, "Input should be greater than or equal to 0"), | ||||
| ({"auto_questions": 11}, "Input should be less than or equal to 10"), | ({"auto_questions": 11}, "Input should be less than or equal to 10"), | ||||
| ({"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ({"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"auto_questions": 3.14}, "Input should be a valid integer"), | |||||
| ({"auto_questions": "string"}, "Input should be a valid integer"), | |||||
| ({"chunk_token_num": 0}, "Input should be greater than or equal to 1"), | ({"chunk_token_num": 0}, "Input should be greater than or equal to 1"), | ||||
| ({"chunk_token_num": 2049}, "Input should be less than or equal to 2048"), | ({"chunk_token_num": 2049}, "Input should be less than or equal to 2048"), | ||||
| ({"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ({"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"chunk_token_num": 3.14}, "Input should be a valid integer"), | |||||
| ({"chunk_token_num": "string"}, "Input should be a valid integer"), | |||||
| ({"delimiter": ""}, "String should have at least 1 character"), | ({"delimiter": ""}, "String should have at least 1 character"), | ||||
| ({"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ({"html4excel": "string"}, "Input should be a valid boolean"), | |||||
| ({"tag_kb_ids": "1,2"}, "Input should be a valid list"), | ({"tag_kb_ids": "1,2"}, "Input should be a valid list"), | ||||
| ({"tag_kb_ids": [1, 2]}, "Input should be a valid string"), | ({"tag_kb_ids": [1, 2]}, "Input should be a valid string"), | ||||
| ({"topn_tags": 0}, "Input should be greater than or equal to 1"), | ({"topn_tags": 0}, "Input should be greater than or equal to 1"), | ||||
| ({"topn_tags": 11}, "Input should be less than or equal to 10"), | ({"topn_tags": 11}, "Input should be less than or equal to 10"), | ||||
| ({"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ({"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"topn_tags": 3.14}, "Input should be a valid integer"), | |||||
| ({"topn_tags": "string"}, "Input should be a valid integer"), | |||||
| ({"filename_embd_weight": -1}, "Input should be greater than or equal to 0"), | ({"filename_embd_weight": -1}, "Input should be greater than or equal to 0"), | ||||
| ({"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"), | ({"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"), | ||||
| ({"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"), | |||||
| ({"filename_embd_weight": "string"}, "Input should be a valid number"), | |||||
| ({"task_page_size": 0}, "Input should be greater than or equal to 1"), | ({"task_page_size": 0}, "Input should be greater than or equal to 1"), | ||||
| ({"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ({"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"task_page_size": 3.14}, "Input should be a valid integer"), | |||||
| ({"task_page_size": "string"}, "Input should be a valid integer"), | |||||
| ({"pages": "1,2"}, "Input should be a valid list"), | ({"pages": "1,2"}, "Input should be a valid list"), | ||||
| ({"pages": ["1,2"]}, "Input should be a valid list"), | ({"pages": ["1,2"]}, "Input should be a valid list"), | ||||
| ({"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ({"pages": [["string1", "string2"]]}, "Input should be a valid integer"), | |||||
| ({"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean"), | |||||
| ({"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"), | ({"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"), | ||||
| ({"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"), | ({"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"), | ||||
| ({"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"), | ({"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"), | ||||
| ({"graphrag": {"method": None}}, "Input should be 'light' or 'general'"), | ({"graphrag": {"method": None}}, "Input should be 'light' or 'general'"), | ||||
| ({"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ({"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ({"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ({"graphrag": {"community": "string"}}, "Input should be a valid boolean"), | |||||
| ({"graphrag": {"resolution": "string"}}, "Input should be a valid boolean"), | |||||
| ({"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean"), | |||||
| ({"raptor": {"prompt": ""}}, "String should have at least 1 character"), | ({"raptor": {"prompt": ""}}, "String should have at least 1 character"), | ||||
| ({"raptor": {"prompt": " "}}, "String should have at least 1 character"), | ({"raptor": {"prompt": " "}}, "String should have at least 1 character"), | ||||
| ({"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"), | ({"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"), | ||||
| ({"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"), | ({"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"), | ||||
| ({"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ({"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"raptor": {"max_token": 3.14}}, "Input should be a valid integer"), | |||||
| ({"raptor": {"max_token": "string"}}, "Input should be a valid integer"), | |||||
| ({"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"), | ({"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"), | ||||
| ({"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"), | ({"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"), | ||||
| ({"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"), | |||||
| ({"raptor": {"threshold": "string"}}, "Input should be a valid number"), | |||||
| ({"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"), | ({"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"), | ||||
| ({"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"), | ({"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"), | ||||
| ({"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"), | |||||
| ({"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer"), | |||||
| ({"raptor": {"max_cluster": "string"}}, "Input should be a valid integer"), | |||||
| ({"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"), | ({"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"), | ||||
| ({"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ({"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"raptor": {"random_seed": 3.14}}, "Input should be a valid integer"), | |||||
| ({"raptor": {"random_seed": "string"}}, "Input should be a valid integer"), | |||||
| ({"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"), | ({"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"), | ||||
| ], | ], | ||||
| ids=[ | ids=[ |
| [ | [ | ||||
| ("me", "me"), | ("me", "me"), | ||||
| ("team", "team"), | ("team", "team"), | ||||
| ("me_upercase", "ME"), | |||||
| ("team_upercase", "TEAM"), | |||||
| ("whitespace", " ME "), | |||||
| ], | ], | ||||
| ids=["me", "team", "me_upercase", "team_upercase", "whitespace"], | |||||
| ids=["me", "team"], | |||||
| ) | ) | ||||
| def test_permission(self, client, name, permission): | def test_permission(self, client, name, permission): | ||||
| payload = {"name": name, "permission": permission} | payload = {"name": name, "permission": permission} | ||||
| [ | [ | ||||
| ("empty", ""), | ("empty", ""), | ||||
| ("unknown", "unknown"), | ("unknown", "unknown"), | ||||
| ("me_upercase", "ME"), | |||||
| ("team_upercase", "TEAM"), | |||||
| ("whitespace", " ME "), | |||||
| ], | ], | ||||
| ids=["empty", "unknown"], | |||||
| ids=["empty", "unknown", "me_upercase", "team_upercase", "whitespace"], | |||||
| ) | ) | ||||
| def test_permission_invalid(self, client, name, permission): | def test_permission_invalid(self, client, name, permission): | ||||
| payload = {"name": name, "permission": permission} | payload = {"name": name, "permission": permission} | ||||
| [ | [ | ||||
| ("auto_keywords_min_limit", {"auto_keywords": -1}, "Input should be greater than or equal to 0"), | ("auto_keywords_min_limit", {"auto_keywords": -1}, "Input should be greater than or equal to 0"), | ||||
| ("auto_keywords_max_limit", {"auto_keywords": 33}, "Input should be less than or equal to 32"), | ("auto_keywords_max_limit", {"auto_keywords": 33}, "Input should be less than or equal to 32"), | ||||
| ("auto_keywords_float_not_allowed", {"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ("auto_keywords_type_invalid", {"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("auto_keywords_float_not_allowed", {"auto_keywords": 3.14}, "Input should be a valid integer"), | |||||
| ("auto_keywords_type_invalid", {"auto_keywords": "string"}, "Input should be a valid integer"), | |||||
| ("auto_questions_min_limit", {"auto_questions": -1}, "Input should be greater than or equal to 0"), | ("auto_questions_min_limit", {"auto_questions": -1}, "Input should be greater than or equal to 0"), | ||||
| ("auto_questions_max_limit", {"auto_questions": 11}, "Input should be less than or equal to 10"), | ("auto_questions_max_limit", {"auto_questions": 11}, "Input should be less than or equal to 10"), | ||||
| ("auto_questions_float_not_allowed", {"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ("auto_questions_type_invalid", {"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("auto_questions_float_not_allowed", {"auto_questions": 3.14}, "Input should be a valid integer"), | |||||
| ("auto_questions_type_invalid", {"auto_questions": "string"}, "Input should be a valid integer"), | |||||
| ("chunk_token_num_min_limit", {"chunk_token_num": 0}, "Input should be greater than or equal to 1"), | ("chunk_token_num_min_limit", {"chunk_token_num": 0}, "Input should be greater than or equal to 1"), | ||||
| ("chunk_token_num_max_limit", {"chunk_token_num": 2049}, "Input should be less than or equal to 2048"), | ("chunk_token_num_max_limit", {"chunk_token_num": 2049}, "Input should be less than or equal to 2048"), | ||||
| ("chunk_token_num_float_not_allowed", {"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ("chunk_token_num_type_invalid", {"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("chunk_token_num_float_not_allowed", {"chunk_token_num": 3.14}, "Input should be a valid integer"), | |||||
| ("chunk_token_num_type_invalid", {"chunk_token_num": "string"}, "Input should be a valid integer"), | |||||
| ("delimiter_empty", {"delimiter": ""}, "String should have at least 1 character"), | ("delimiter_empty", {"delimiter": ""}, "String should have at least 1 character"), | ||||
| ("html4excel_type_invalid", {"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ("html4excel_type_invalid", {"html4excel": "string"}, "Input should be a valid boolean"), | |||||
| ("tag_kb_ids_not_list", {"tag_kb_ids": "1,2"}, "Input should be a valid list"), | ("tag_kb_ids_not_list", {"tag_kb_ids": "1,2"}, "Input should be a valid list"), | ||||
| ("tag_kb_ids_int_in_list", {"tag_kb_ids": [1, 2]}, "Input should be a valid string"), | ("tag_kb_ids_int_in_list", {"tag_kb_ids": [1, 2]}, "Input should be a valid string"), | ||||
| ("topn_tags_min_limit", {"topn_tags": 0}, "Input should be greater than or equal to 1"), | ("topn_tags_min_limit", {"topn_tags": 0}, "Input should be greater than or equal to 1"), | ||||
| ("topn_tags_max_limit", {"topn_tags": 11}, "Input should be less than or equal to 10"), | ("topn_tags_max_limit", {"topn_tags": 11}, "Input should be less than or equal to 10"), | ||||
| ("topn_tags_float_not_allowed", {"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ("topn_tags_type_invalid", {"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("topn_tags_float_not_allowed", {"topn_tags": 3.14}, "Input should be a valid integer"), | |||||
| ("topn_tags_type_invalid", {"topn_tags": "string"}, "Input should be a valid integer"), | |||||
| ("filename_embd_weight_min_limit", {"filename_embd_weight": -1}, "Input should be greater than or equal to 0"), | ("filename_embd_weight_min_limit", {"filename_embd_weight": -1}, "Input should be greater than or equal to 0"), | ||||
| ("filename_embd_weight_max_limit", {"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"), | ("filename_embd_weight_max_limit", {"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"), | ||||
| ("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"), | |||||
| ("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number"), | |||||
| ("task_page_size_min_limit", {"task_page_size": 0}, "Input should be greater than or equal to 1"), | ("task_page_size_min_limit", {"task_page_size": 0}, "Input should be greater than or equal to 1"), | ||||
| ("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer"), | |||||
| ("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer"), | |||||
| ("pages_not_list", {"pages": "1,2"}, "Input should be a valid list"), | ("pages_not_list", {"pages": "1,2"}, "Input should be a valid list"), | ||||
| ("pages_not_list_in_list", {"pages": ["1,2"]}, "Input should be a valid list"), | ("pages_not_list_in_list", {"pages": ["1,2"]}, "Input should be a valid list"), | ||||
| ("pages_not_int_list", {"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("graphrag_type_invalid", {"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ("pages_not_int_list", {"pages": [["string1", "string2"]]}, "Input should be a valid integer"), | |||||
| ("graphrag_type_invalid", {"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean"), | |||||
| ("graphrag_entity_types_not_list", {"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"), | ("graphrag_entity_types_not_list", {"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"), | ||||
| ("graphrag_entity_types_not_str_in_list", {"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"), | ("graphrag_entity_types_not_str_in_list", {"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"), | ||||
| ("graphrag_method_unknown", {"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"), | ("graphrag_method_unknown", {"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"), | ||||
| ("graphrag_method_none", {"graphrag": {"method": None}}, "Input should be 'light' or 'general'"), | ("graphrag_method_none", {"graphrag": {"method": None}}, "Input should be 'light' or 'general'"), | ||||
| ("graphrag_community_type_invalid", {"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ("graphrag_resolution_type_invalid", {"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ("raptor_type_invalid", {"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ("graphrag_community_type_invalid", {"graphrag": {"community": "string"}}, "Input should be a valid boolean"), | |||||
| ("graphrag_resolution_type_invalid", {"graphrag": {"resolution": "string"}}, "Input should be a valid boolean"), | |||||
| ("raptor_type_invalid", {"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean"), | |||||
| ("raptor_prompt_empty", {"raptor": {"prompt": ""}}, "String should have at least 1 character"), | ("raptor_prompt_empty", {"raptor": {"prompt": ""}}, "String should have at least 1 character"), | ||||
| ("raptor_prompt_space", {"raptor": {"prompt": " "}}, "String should have at least 1 character"), | ("raptor_prompt_space", {"raptor": {"prompt": " "}}, "String should have at least 1 character"), | ||||
| ("raptor_max_token_min_limit", {"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"), | ("raptor_max_token_min_limit", {"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"), | ||||
| ("raptor_max_token_max_limit", {"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"), | ("raptor_max_token_max_limit", {"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"), | ||||
| ("raptor_max_token_float_not_allowed", {"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ("raptor_max_token_type_invalid", {"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("raptor_max_token_float_not_allowed", {"raptor": {"max_token": 3.14}}, "Input should be a valid integer"), | |||||
| ("raptor_max_token_type_invalid", {"raptor": {"max_token": "string"}}, "Input should be a valid integer"), | |||||
| ("raptor_threshold_min_limit", {"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"), | ("raptor_threshold_min_limit", {"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"), | ||||
| ("raptor_threshold_max_limit", {"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"), | ("raptor_threshold_max_limit", {"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"), | ||||
| ("raptor_threshold_type_invalid", {"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"), | |||||
| ("raptor_threshold_type_invalid", {"raptor": {"threshold": "string"}}, "Input should be a valid number"), | |||||
| ("raptor_max_cluster_min_limit", {"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"), | ("raptor_max_cluster_min_limit", {"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"), | ||||
| ("raptor_max_cluster_max_limit", {"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"), | ("raptor_max_cluster_max_limit", {"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"), | ||||
| ("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"), | |||||
| ("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer"), | |||||
| ("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer"), | |||||
| ("raptor_random_seed_min_limit", {"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"), | ("raptor_random_seed_min_limit", {"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"), | ||||
| ("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer"), | |||||
| ("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer"), | |||||
| ("parser_config_type_invalid", {"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"), | ("parser_config_type_invalid", {"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"), | ||||
| ], | ], | ||||
| ids=[ | ids=[ |
| [ | [ | ||||
| {"orderby": "create_time"}, | {"orderby": "create_time"}, | ||||
| {"orderby": "update_time"}, | {"orderby": "update_time"}, | ||||
| {"orderby": "CREATE_TIME"}, | |||||
| {"orderby": "UPDATE_TIME"}, | |||||
| {"orderby": " create_time "}, | |||||
| ], | ], | ||||
| ids=["orderby_create_time", "orderby_update_time", "orderby_create_time_upper", "orderby_update_time_upper", "whitespace"], | |||||
| ids=["orderby_create_time", "orderby_update_time"], | |||||
| ) | ) | ||||
| def test_orderby(self, client, params): | def test_orderby(self, client, params): | ||||
| client.list_datasets(**params) | client.list_datasets(**params) | ||||
| [ | [ | ||||
| {"orderby": ""}, | {"orderby": ""}, | ||||
| {"orderby": "unknown"}, | {"orderby": "unknown"}, | ||||
| {"orderby": "CREATE_TIME"}, | |||||
| {"orderby": "UPDATE_TIME"}, | |||||
| {"orderby": " create_time "}, | |||||
| ], | ], | ||||
| ids=["empty", "unknown"], | |||||
| ids=["empty", "unknown", "orderby_create_time_upper", "orderby_update_time_upper", "whitespace"], | |||||
| ) | ) | ||||
| def test_orderby_invalid(self, client, params): | def test_orderby_invalid(self, client, params): | ||||
| with pytest.raises(Exception) as excinfo: | with pytest.raises(Exception) as excinfo: |
| [ | [ | ||||
| "me", | "me", | ||||
| "team", | "team", | ||||
| "ME", | |||||
| "TEAM", | |||||
| " ME ", | |||||
| ], | ], | ||||
| ids=["me", "team", "me_upercase", "team_upercase", "whitespace"], | |||||
| ids=["me", "team"], | |||||
| ) | ) | ||||
| def test_permission(self, client, add_dataset_func, permission): | def test_permission(self, client, add_dataset_func, permission): | ||||
| dataset = add_dataset_func | dataset = add_dataset_func | ||||
| "", | "", | ||||
| "unknown", | "unknown", | ||||
| list(), | list(), | ||||
| "ME", | |||||
| "TEAM", | |||||
| " ME ", | |||||
| ], | ], | ||||
| ids=["empty", "unknown", "type_error"], | |||||
| ids=["empty", "unknown", "type_error", "me_upercase", "team_upercase", "whitespace"], | |||||
| ) | ) | ||||
| def test_permission_invalid(self, add_dataset_func, permission): | def test_permission_invalid(self, add_dataset_func, permission): | ||||
| dataset = add_dataset_func | dataset = add_dataset_func | ||||
| [ | [ | ||||
| ({"auto_keywords": -1}, "Input should be greater than or equal to 0"), | ({"auto_keywords": -1}, "Input should be greater than or equal to 0"), | ||||
| ({"auto_keywords": 33}, "Input should be less than or equal to 32"), | ({"auto_keywords": 33}, "Input should be less than or equal to 32"), | ||||
| ({"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ({"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"auto_keywords": 3.14}, "Input should be a valid integer"), | |||||
| ({"auto_keywords": "string"}, "Input should be a valid integer"), | |||||
| ({"auto_questions": -1}, "Input should be greater than or equal to 0"), | ({"auto_questions": -1}, "Input should be greater than or equal to 0"), | ||||
| ({"auto_questions": 11}, "Input should be less than or equal to 10"), | ({"auto_questions": 11}, "Input should be less than or equal to 10"), | ||||
| ({"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ({"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"auto_questions": 3.14}, "Input should be a valid integer"), | |||||
| ({"auto_questions": "string"}, "Input should be a valid integer"), | |||||
| ({"chunk_token_num": 0}, "Input should be greater than or equal to 1"), | ({"chunk_token_num": 0}, "Input should be greater than or equal to 1"), | ||||
| ({"chunk_token_num": 2049}, "Input should be less than or equal to 2048"), | ({"chunk_token_num": 2049}, "Input should be less than or equal to 2048"), | ||||
| ({"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ({"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"chunk_token_num": 3.14}, "Input should be a valid integer"), | |||||
| ({"chunk_token_num": "string"}, "Input should be a valid integer"), | |||||
| ({"delimiter": ""}, "String should have at least 1 character"), | ({"delimiter": ""}, "String should have at least 1 character"), | ||||
| ({"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ({"html4excel": "string"}, "Input should be a valid boolean"), | |||||
| ({"tag_kb_ids": "1,2"}, "Input should be a valid list"), | ({"tag_kb_ids": "1,2"}, "Input should be a valid list"), | ||||
| ({"tag_kb_ids": [1, 2]}, "Input should be a valid string"), | ({"tag_kb_ids": [1, 2]}, "Input should be a valid string"), | ||||
| ({"topn_tags": 0}, "Input should be greater than or equal to 1"), | ({"topn_tags": 0}, "Input should be greater than or equal to 1"), | ||||
| ({"topn_tags": 11}, "Input should be less than or equal to 10"), | ({"topn_tags": 11}, "Input should be less than or equal to 10"), | ||||
| ({"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ({"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"topn_tags": 3.14}, "Input should be a valid integer"), | |||||
| ({"topn_tags": "string"}, "Input should be a valid integer"), | |||||
| ({"filename_embd_weight": -1}, "Input should be greater than or equal to 0"), | ({"filename_embd_weight": -1}, "Input should be greater than or equal to 0"), | ||||
| ({"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"), | ({"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"), | ||||
| ({"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"), | |||||
| ({"filename_embd_weight": "string"}, "Input should be a valid number"), | |||||
| ({"task_page_size": 0}, "Input should be greater than or equal to 1"), | ({"task_page_size": 0}, "Input should be greater than or equal to 1"), | ||||
| ({"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ({"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"task_page_size": 3.14}, "Input should be a valid integer"), | |||||
| ({"task_page_size": "string"}, "Input should be a valid integer"), | |||||
| ({"pages": "1,2"}, "Input should be a valid list"), | ({"pages": "1,2"}, "Input should be a valid list"), | ||||
| ({"pages": ["1,2"]}, "Input should be a valid list"), | ({"pages": ["1,2"]}, "Input should be a valid list"), | ||||
| ({"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ({"pages": [["string1", "string2"]]}, "Input should be a valid integer"), | |||||
| ({"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean"), | |||||
| ({"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"), | ({"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"), | ||||
| ({"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"), | ({"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"), | ||||
| ({"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"), | ({"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"), | ||||
| ({"graphrag": {"method": None}}, "Input should be 'light' or 'general'"), | ({"graphrag": {"method": None}}, "Input should be 'light' or 'general'"), | ||||
| ({"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ({"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ({"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"), | |||||
| ({"graphrag": {"community": "string"}}, "Input should be a valid boolean"), | |||||
| ({"graphrag": {"resolution": "string"}}, "Input should be a valid boolean"), | |||||
| ({"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean"), | |||||
| ({"raptor": {"prompt": ""}}, "String should have at least 1 character"), | ({"raptor": {"prompt": ""}}, "String should have at least 1 character"), | ||||
| ({"raptor": {"prompt": " "}}, "String should have at least 1 character"), | ({"raptor": {"prompt": " "}}, "String should have at least 1 character"), | ||||
| ({"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"), | ({"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"), | ||||
| ({"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"), | ({"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"), | ||||
| ({"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ({"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"raptor": {"max_token": 3.14}}, "Input should be a valid integer"), | |||||
| ({"raptor": {"max_token": "string"}}, "Input should be a valid integer"), | |||||
| ({"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"), | ({"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"), | ||||
| ({"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"), | ({"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"), | ||||
| ({"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"), | |||||
| ({"raptor": {"threshold": "string"}}, "Input should be a valid number"), | |||||
| ({"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"), | ({"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"), | ||||
| ({"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"), | ({"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"), | ||||
| ({"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"), | |||||
| ({"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer"), | |||||
| ({"raptor": {"max_cluster": "string"}}, "Input should be a valid integer"), | |||||
| ({"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"), | ({"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"), | ||||
| ({"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), | |||||
| ({"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), | |||||
| ({"raptor": {"random_seed": 3.14}}, "Input should be a valid integer"), | |||||
| ({"raptor": {"random_seed": "string"}}, "Input should be a valid integer"), | |||||
| ({"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"), | ({"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"), | ||||
| ], | ], | ||||
| ids=[ | ids=[ |