### What problem does this PR solve? Fix HTTP API Create/Update dataset parser config default value error ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)tags/v0.19.0
| if req.get("parser_config"): | if req.get("parser_config"): | ||||
| req["parser_config"] = deep_merge(kb.parser_config, req["parser_config"]) | req["parser_config"] = deep_merge(kb.parser_config, req["parser_config"]) | ||||
| if (chunk_method := req.get("parser_id")) and chunk_method != kb.parser_id and req.get("parser_config") is None: | |||||
| req["parser_config"] = get_parser_config(chunk_method, None) | |||||
| if (chunk_method := req.get("parser_id")) and chunk_method != kb.parser_id: | |||||
| if not req.get("parser_config"): | |||||
| req["parser_config"] = get_parser_config(chunk_method, None) | |||||
| elif "parser_config" in req and not req["parser_config"]: | |||||
| del req["parser_config"] | |||||
| if "name" in req and req["name"].lower() != kb.name.lower(): | if "name" in req and req["name"].lower() != kb.name.lower(): | ||||
| try: | try: |
| permission: Annotated[PermissionEnum, StringConstraints(strip_whitespace=True, min_length=1, max_length=16), Field(default=PermissionEnum.me)] | permission: Annotated[PermissionEnum, StringConstraints(strip_whitespace=True, min_length=1, max_length=16), Field(default=PermissionEnum.me)] | ||||
| chunk_method: Annotated[ChunkMethodnEnum, StringConstraints(strip_whitespace=True, min_length=1, max_length=32), Field(default=ChunkMethodnEnum.naive, serialization_alias="parser_id")] | chunk_method: Annotated[ChunkMethodnEnum, StringConstraints(strip_whitespace=True, min_length=1, max_length=32), Field(default=ChunkMethodnEnum.naive, serialization_alias="parser_id")] | ||||
| pagerank: int = Field(default=0, ge=0, le=100) | pagerank: int = Field(default=0, ge=0, le=100) | ||||
| parser_config: ParserConfig = Field(default_factory=dict) | |||||
| parser_config: ParserConfig | None = Field(default=None) | |||||
| @field_validator("avatar") | @field_validator("avatar") | ||||
| @classmethod | @classmethod | ||||
| """ | """ | ||||
| return v.lower() if isinstance(v, str) else v | return v.lower() if isinstance(v, str) else v | ||||
| @field_validator("parser_config", mode="before") | |||||
| @classmethod | |||||
| def normalize_empty_parser_config(cls, v: Any) -> Any: | |||||
| """ | |||||
| Normalizes empty parser configuration by converting empty dictionaries to None. | |||||
| This validator ensures consistent handling of empty parser configurations across | |||||
| the application by converting empty dicts to None values. | |||||
| Args: | |||||
| v (Any): Raw input value for the parser config field | |||||
| Returns: | |||||
| Any: Returns None if input is an empty dict, otherwise returns the original value | |||||
| Example: | |||||
| >>> normalize_empty_parser_config({}) | |||||
| None | |||||
| >>> normalize_empty_parser_config({"key": "value"}) | |||||
| {"key": "value"} | |||||
| """ | |||||
| if v == {}: | |||||
| return None | |||||
| return v | |||||
| @field_validator("parser_config", mode="after") | @field_validator("parser_config", mode="after") | ||||
| @classmethod | @classmethod | ||||
| def validate_parser_config_json_length(cls, v: ParserConfig) -> ParserConfig: | |||||
| def validate_parser_config_json_length(cls, v: ParserConfig | None) -> ParserConfig | None: | |||||
| """ | """ | ||||
| Validates serialized JSON length constraints for parser configuration. | Validates serialized JSON length constraints for parser configuration. | ||||
| Implements a two-stage validation workflow: | Implements a two-stage validation workflow: | ||||
| 1. Model serialization - convert Pydantic model to JSON string | |||||
| 2. Size verification - enforce maximum allowed payload size | |||||
| 1. Null check - bypass validation for empty configurations | |||||
| 2. Model serialization - convert Pydantic model to JSON string | |||||
| 3. Size verification - enforce maximum allowed payload size | |||||
| Args: | Args: | ||||
| v (ParserConfig | None): Raw parser configuration object | v (ParserConfig | None): Raw parser configuration object | ||||
| Raises: | Raises: | ||||
| ValueError: When serialized JSON exceeds 65,535 characters | ValueError: When serialized JSON exceeds 65,535 characters | ||||
| """ | """ | ||||
| if v is None: | |||||
| return None | |||||
| if (json_str := v.model_dump_json()) and len(json_str) > 65535: | if (json_str := v.model_dump_json()) and len(json_str) > 65535: | ||||
| raise ValueError(f"Parser config exceeds size limit (max 65,535 characters). Current size: {len(json_str):,}") | raise ValueError(f"Parser config exceeds size limit (max 65,535 characters). Current size: {len(json_str):,}") | ||||
| return v | return v |
| def test_avatar(self, get_http_api_auth, tmp_path): | def test_avatar(self, get_http_api_auth, tmp_path): | ||||
| fn = create_image_file(tmp_path / "ragflow_test.png") | fn = create_image_file(tmp_path / "ragflow_test.png") | ||||
| payload = { | payload = { | ||||
| "name": "avatar_test", | |||||
| "name": "avatar", | |||||
| "avatar": f"data:image/png;base64,{encode_avatar(fn)}", | "avatar": f"data:image/png;base64,{encode_avatar(fn)}", | ||||
| } | } | ||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_avatar_exceeds_limit_length(self, get_http_api_auth): | def test_avatar_exceeds_limit_length(self, get_http_api_auth): | ||||
| payload = {"name": "exceeds_limit_length_avatar", "avatar": "a" * 65536} | |||||
| payload = {"name": "avatar_exceeds_limit_length", "avatar": "a" * 65536} | |||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 101, res | assert res["code"] == 101, res | ||||
| assert "String should have at most 65535 characters" in res["message"], res | assert "String should have at most 65535 characters" in res["message"], res | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "name, avatar_prefix, expected_message", | |||||
| "name, prefix, expected_message", | |||||
| [ | [ | ||||
| ("empty_prefix", "", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"), | ("empty_prefix", "", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"), | ||||
| ("missing_comma", "data:image/png;base64", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"), | ("missing_comma", "data:image/png;base64", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"), | ||||
| ], | ], | ||||
| ids=["empty_prefix", "missing_comma", "unsupported_mine_type", "invalid_mine_type"], | ids=["empty_prefix", "missing_comma", "unsupported_mine_type", "invalid_mine_type"], | ||||
| ) | ) | ||||
| def test_avatar_invalid_prefix(self, get_http_api_auth, tmp_path, name, avatar_prefix, expected_message): | |||||
| def test_avatar_invalid_prefix(self, get_http_api_auth, tmp_path, name, prefix, expected_message): | |||||
| fn = create_image_file(tmp_path / "ragflow_test.png") | fn = create_image_file(tmp_path / "ragflow_test.png") | ||||
| payload = { | payload = { | ||||
| "name": name, | "name": name, | ||||
| "avatar": f"{avatar_prefix}{encode_avatar(fn)}", | |||||
| "avatar": f"{prefix}{encode_avatar(fn)}", | |||||
| } | } | ||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 101, res | assert res["code"] == 101, res | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_avatar_unset(self, get_http_api_auth): | def test_avatar_unset(self, get_http_api_auth): | ||||
| payload = {"name": "test_avatar_unset"} | |||||
| payload = {"name": "avatar_unset"} | |||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 0, res | assert res["code"] == 0, res | ||||
| assert res["data"]["avatar"] is None, res | assert res["data"]["avatar"] is None, res | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_avatar_none(self, get_http_api_auth): | def test_avatar_none(self, get_http_api_auth): | ||||
| payload = {"name": "test_avatar_none", "avatar": None} | |||||
| payload = {"name": "avatar_none", "avatar": None} | |||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 0, res | assert res["code"] == 0, res | ||||
| assert res["data"]["avatar"] is None, res | assert res["data"]["avatar"] is None, res | ||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_description(self, get_http_api_auth): | def test_description(self, get_http_api_auth): | ||||
| payload = {"name": "test_description", "description": "description"} | |||||
| payload = {"name": "description", "description": "description"} | |||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 0, res | assert res["code"] == 0, res | ||||
| assert res["data"]["description"] == "description", res | assert res["data"]["description"] == "description", res | ||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_description_exceeds_limit_length(self, get_http_api_auth): | def test_description_exceeds_limit_length(self, get_http_api_auth): | ||||
| payload = {"name": "exceeds_limit_length_description", "description": "a" * 65536} | |||||
| payload = {"name": "description_exceeds_limit_length", "description": "a" * 65536} | |||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 101, res | assert res["code"] == 101, res | ||||
| assert "String should have at most 65535 characters" in res["message"], res | assert "String should have at most 65535 characters" in res["message"], res | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_description_unset(self, get_http_api_auth): | def test_description_unset(self, get_http_api_auth): | ||||
| payload = {"name": "test_description_unset"} | |||||
| payload = {"name": "description_unset"} | |||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 0, res | assert res["code"] == 0, res | ||||
| assert res["data"]["description"] is None, res | assert res["data"]["description"] is None, res | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_description_none(self, get_http_api_auth): | def test_description_none(self, get_http_api_auth): | ||||
| payload = {"name": "test_description_none", "description": None} | |||||
| payload = {"name": "description_none", "description": None} | |||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 0, res | assert res["code"] == 0, res | ||||
| assert res["data"]["description"] is None, res | assert res["data"]["description"] is None, res | ||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_embedding_model_none(self, get_http_api_auth): | def test_embedding_model_none(self, get_http_api_auth): | ||||
| payload = {"name": "test_embedding_model_none", "embedding_model": None} | |||||
| payload = {"name": "embedding_model_none", "embedding_model": None} | |||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 101, res | assert res["code"] == 101, res | ||||
| assert "Input should be a valid string" in res["message"], res | assert "Input should be a valid string" in res["message"], res | ||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_permission_unset(self, get_http_api_auth): | def test_permission_unset(self, get_http_api_auth): | ||||
| payload = {"name": "test_permission_unset"} | |||||
| payload = {"name": "permission_unset"} | |||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 0, res | assert res["code"] == 0, res | ||||
| assert res["data"]["permission"] == "me", res | assert res["data"]["permission"] == "me", res | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_permission_none(self, get_http_api_auth): | def test_permission_none(self, get_http_api_auth): | ||||
| payload = {"name": "test_permission_none", "permission": None} | |||||
| payload = {"name": "permission_none", "permission": None} | |||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 101, res | assert res["code"] == 101, res | ||||
| assert "Input should be 'me' or 'team'" in res["message"], res | assert "Input should be 'me' or 'team'" in res["message"], res | ||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_chunk_method_unset(self, get_http_api_auth): | def test_chunk_method_unset(self, get_http_api_auth): | ||||
| payload = {"name": "test_chunk_method_unset"} | |||||
| payload = {"name": "chunk_method_unset"} | |||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 0, res | assert res["code"] == 0, res | ||||
| assert res["data"]["chunk_method"] == "naive", res | assert res["data"]["chunk_method"] == "naive", res | ||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_parser_config_empty(self, get_http_api_auth): | def test_parser_config_empty(self, get_http_api_auth): | ||||
| payload = {"name": "default_empty", "parser_config": {}} | |||||
| payload = {"name": "parser_config_empty", "parser_config": {}} | |||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 0, res | assert res["code"] == 0, res | ||||
| assert res["data"]["parser_config"] == { | assert res["data"]["parser_config"] == { | ||||
| "auto_keywords": 0, | |||||
| "auto_questions": 0, | |||||
| "chunk_token_num": 128, | "chunk_token_num": 128, | ||||
| "delimiter": r"\n", | "delimiter": r"\n", | ||||
| "filename_embd_weight": None, | |||||
| "graphrag": None, | |||||
| "html4excel": False, | "html4excel": False, | ||||
| "layout_recognize": "DeepDOC", | "layout_recognize": "DeepDOC", | ||||
| "pages": None, | |||||
| "raptor": None, | |||||
| "tag_kb_ids": [], | |||||
| "task_page_size": None, | |||||
| "topn_tags": 1, | |||||
| } | |||||
| "raptor": {"use_raptor": False}, | |||||
| }, res | |||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_parser_config_unset(self, get_http_api_auth): | def test_parser_config_unset(self, get_http_api_auth): | ||||
| payload = {"name": "default_unset"} | |||||
| payload = {"name": "parser_config_unset"} | |||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 0, res | assert res["code"] == 0, res | ||||
| assert res["data"]["parser_config"] == { | assert res["data"]["parser_config"] == { | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_parser_config_none(self, get_http_api_auth): | def test_parser_config_none(self, get_http_api_auth): | ||||
| payload = {"name": "default_none", "parser_config": None} | |||||
| payload = {"name": "parser_config_none", "parser_config": None} | |||||
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 101, res | |||||
| assert "Input should be a valid dictionary or instance of ParserConfig" in res["message"], res | |||||
| assert res["code"] == 0, res | |||||
| assert res["data"]["parser_config"] == { | |||||
| "chunk_token_num": 128, | |||||
| "delimiter": "\\n", | |||||
| "html4excel": False, | |||||
| "layout_recognize": "DeepDOC", | |||||
| "raptor": {"use_raptor": False}, | |||||
| }, res | |||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( |
| class TestDatasetUpdate: | class TestDatasetUpdate: | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_dataset_id_not_uuid(self, get_http_api_auth): | def test_dataset_id_not_uuid(self, get_http_api_auth): | ||||
| payload = {"name": "dataset_id_not_uuid"} | |||||
| payload = {"name": "not_uuid"} | |||||
| res = update_dataset(get_http_api_auth, "not_uuid", payload) | res = update_dataset(get_http_api_auth, "not_uuid", payload) | ||||
| assert res["code"] == 101, res | assert res["code"] == 101, res | ||||
| assert "Input should be a valid UUID" in res["message"], res | assert "Input should be a valid UUID" in res["message"], res | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "name, avatar_prefix, expected_message", | |||||
| "avatar_prefix, expected_message", | |||||
| [ | [ | ||||
| ("empty_prefix", "", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"), | |||||
| ("missing_comma", "data:image/png;base64", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"), | |||||
| ("unsupported_mine_type", "invalid_mine_prefix:image/png;base64,", "Invalid MIME prefix format. Must start with 'data:'"), | |||||
| ("invalid_mine_type", "data:unsupported_mine_type;base64,", "Unsupported MIME type. Allowed: ['image/jpeg', 'image/png']"), | |||||
| ("", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"), | |||||
| ("data:image/png;base64", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"), | |||||
| ("invalid_mine_prefix:image/png;base64,", "Invalid MIME prefix format. Must start with 'data:'"), | |||||
| ("data:unsupported_mine_type;base64,", "Unsupported MIME type. Allowed: ['image/jpeg', 'image/png']"), | |||||
| ], | ], | ||||
| ids=["empty_prefix", "missing_comma", "unsupported_mine_type", "invalid_mine_type"], | ids=["empty_prefix", "missing_comma", "unsupported_mine_type", "invalid_mine_type"], | ||||
| ) | ) | ||||
| def test_avatar_invalid_prefix(self, get_http_api_auth, add_dataset_func, tmp_path, name, avatar_prefix, expected_message): | |||||
| def test_avatar_invalid_prefix(self, get_http_api_auth, add_dataset_func, tmp_path, avatar_prefix, expected_message): | |||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| fn = create_image_file(tmp_path / "ragflow_test.png") | fn = create_image_file(tmp_path / "ragflow_test.png") | ||||
| payload = { | |||||
| "name": name, | |||||
| "avatar": f"{avatar_prefix}{encode_avatar(fn)}", | |||||
| } | |||||
| payload = {"avatar": f"{avatar_prefix}{encode_avatar(fn)}"} | |||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | res = update_dataset(get_http_api_auth, dataset_id, payload) | ||||
| assert res["code"] == 101, res | assert res["code"] == 101, res | ||||
| assert expected_message in res["message"], res | assert expected_message in res["message"], res | ||||
| @pytest.mark.p1 | @pytest.mark.p1 | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "name, permission", | |||||
| "permission", | |||||
| [ | [ | ||||
| ("me", "me"), | |||||
| ("team", "team"), | |||||
| ("me_upercase", "ME"), | |||||
| ("team_upercase", "TEAM"), | |||||
| "me", | |||||
| "team", | |||||
| "ME", | |||||
| "TEAM", | |||||
| ], | ], | ||||
| ids=["me", "team", "me_upercase", "team_upercase"], | ids=["me", "team", "me_upercase", "team_upercase"], | ||||
| ) | ) | ||||
| def test_permission(self, get_http_api_auth, add_dataset_func, name, permission): | |||||
| def test_permission(self, get_http_api_auth, add_dataset_func, permission): | |||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| payload = {"name": name, "permission": permission} | |||||
| payload = {"permission": permission} | |||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | res = update_dataset(get_http_api_auth, dataset_id, payload) | ||||
| assert res["code"] == 0, res | assert res["code"] == 0, res | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_permission_none(self, get_http_api_auth, add_dataset_func): | def test_permission_none(self, get_http_api_auth, add_dataset_func): | ||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| payload = {"name": "test_permission_none", "permission": None} | |||||
| payload = {"permission": None} | |||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | res = update_dataset(get_http_api_auth, dataset_id, payload) | ||||
| assert res["code"] == 101, res | assert res["code"] == 101, res | ||||
| assert "Input should be 'me' or 'team'" in res["message"], res | assert "Input should be 'me' or 'team'" in res["message"], res | ||||
| res = list_datasets(get_http_api_auth) | res = list_datasets(get_http_api_auth) | ||||
| assert res["code"] == 0, res | assert res["code"] == 0, res | ||||
| assert res["data"][0]["parser_config"] == {} | |||||
| # @pytest.mark.p2 | |||||
| # def test_parser_config_unset(self, get_http_api_auth, add_dataset_func): | |||||
| # dataset_id = add_dataset_func | |||||
| # payload = {"name": "default_unset"} | |||||
| # res = update_dataset(get_http_api_auth, dataset_id, payload) | |||||
| # assert res["code"] == 0, res | |||||
| # res = list_datasets(get_http_api_auth) | |||||
| # assert res["code"] == 0, res | |||||
| # assert res["data"][0]["parser_config"] == { | |||||
| # "chunk_token_num": 128, | |||||
| # "delimiter": r"\n", | |||||
| # "html4excel": False, | |||||
| # "layout_recognize": "DeepDOC", | |||||
| # "raptor": {"use_raptor": False}, | |||||
| # }, res | |||||
| assert res["data"][0]["parser_config"] == { | |||||
| "chunk_token_num": 128, | |||||
| "delimiter": r"\n", | |||||
| "html4excel": False, | |||||
| "layout_recognize": "DeepDOC", | |||||
| "raptor": {"use_raptor": False}, | |||||
| }, res | |||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_parser_config_none(self, get_http_api_auth, add_dataset_func): | def test_parser_config_none(self, get_http_api_auth, add_dataset_func): | ||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| payload = {"parser_config": None} | payload = {"parser_config": None} | ||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | res = update_dataset(get_http_api_auth, dataset_id, payload) | ||||
| assert res["code"] == 101, res | |||||
| assert "Input should be a valid dictionary or instance of ParserConfig" in res["message"], res | |||||
| assert res["code"] == 0, res | |||||
| res = list_datasets(get_http_api_auth, {"id": dataset_id}) | |||||
| assert res["code"] == 0, res | |||||
| assert res["data"][0]["parser_config"] == { | |||||
| "chunk_token_num": 128, | |||||
| "delimiter": r"\n", | |||||
| "html4excel": False, | |||||
| "layout_recognize": "DeepDOC", | |||||
| "raptor": {"use_raptor": False}, | |||||
| }, res | |||||
| @pytest.mark.p3 | |||||
| def test_parser_config_empty_with_chunk_method_change(self, get_http_api_auth, add_dataset_func): | |||||
| dataset_id = add_dataset_func | |||||
| payload = {"chunk_method": "qa", "parser_config": {}} | |||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == 0, res | |||||
| res = list_datasets(get_http_api_auth) | |||||
| print(res) | |||||
| assert res["code"] == 0, res | |||||
| assert res["data"][0]["parser_config"] == {"raptor": {"use_raptor": False}}, res | |||||
| @pytest.mark.p3 | |||||
| def test_parser_config_unset_with_chunk_method_change(self, get_http_api_auth, add_dataset_func): | |||||
| dataset_id = add_dataset_func | |||||
| payload = {"chunk_method": "qa"} | |||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == 0, res | |||||
| res = list_datasets(get_http_api_auth) | |||||
| assert res["code"] == 0, res | |||||
| assert res["data"][0]["parser_config"] == {"raptor": {"use_raptor": False}}, res | |||||
| @pytest.mark.p3 | |||||
| def test_parser_config_none_with_chunk_method_change(self, get_http_api_auth, add_dataset_func): | |||||
| dataset_id = add_dataset_func | |||||
| payload = {"chunk_method": "qa", "parser_config": None} | |||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == 0, res | |||||
| res = list_datasets(get_http_api_auth, {"id": dataset_id}) | |||||
| print(res) | |||||
| assert res["code"] == 0, res | |||||
| assert res["data"][0]["parser_config"] == {"raptor": {"use_raptor": False}}, res | |||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| {"unknown_field": "unknown_field"}, | {"unknown_field": "unknown_field"}, | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_unsupported_field(self, get_http_api_auth, add_dataset_func, payload): | |||||
| def test_field_unsupported(self, get_http_api_auth, add_dataset_func, payload): | |||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | res = update_dataset(get_http_api_auth, dataset_id, payload) | ||||
| assert res["code"] == 101, res | assert res["code"] == 101, res | ||||
| assert "Extra inputs are not permitted" in res["message"], res | assert "Extra inputs are not permitted" in res["message"], res | ||||
| @pytest.mark.p2 | |||||
| def test_field_unset(self, get_http_api_auth, add_dataset_func): | |||||
| dataset_id = add_dataset_func | |||||
| res = list_datasets(get_http_api_auth) | |||||
| assert res["code"] == 0, res | |||||
| original_data = res["data"][0] | |||||
| payload = {"name": "default_unset"} | |||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == 0, res | |||||
| res = list_datasets(get_http_api_auth) | |||||
| assert res["code"] == 0, res | |||||
| assert res["data"][0]["avatar"] == original_data["avatar"], res | |||||
| assert res["data"][0]["description"] == original_data["description"], res | |||||
| assert res["data"][0]["embedding_model"] == original_data["embedding_model"], res | |||||
| assert res["data"][0]["permission"] == original_data["permission"], res | |||||
| assert res["data"][0]["chunk_method"] == original_data["chunk_method"], res | |||||
| assert res["data"][0]["pagerank"] == original_data["pagerank"], res | |||||
| assert res["data"][0]["parser_config"] == { | |||||
| "chunk_token_num": 128, | |||||
| "delimiter": r"\n", | |||||
| "html4excel": False, | |||||
| "layout_recognize": "DeepDOC", | |||||
| "raptor": {"use_raptor": False}, | |||||
| }, res |