### What problem does this PR solve? cover [update documents endpoints](https://ragflow.io/docs/dev/http_api_reference#update-document) ### Type of change - [x] add test cases

7 月之前 · d1d651080a
--- a/sdk/python/test/test_http_api/common.py
+++ b/sdk/python/test/test_http_api/common.py
@@ -88,18 +88,25 @@ def upload_documnets(auth, dataset_id, files_path=None):
        files_path = []

    fields = []
    for i, fp in enumerate(files_path):
        p = Path(fp)
        fields.append(("file", (p.name, p.open("rb"))))
    m = MultipartEncoder(fields=fields)

    res = requests.post(
        url=url,
        headers={"Content-Type": m.content_type},
        auth=auth,
        data=m,
    )
    return res.json()
    file_objects = []
    try:
        for fp in files_path:
            p = Path(fp)
            f = p.open("rb")
            fields.append(("file", (p.name, f)))
            file_objects.append(f)
        m = MultipartEncoder(fields=fields)

        res = requests.post(
            url=url,
            headers={"Content-Type": m.content_type},
            auth=auth,
            data=m,
        )
        return res.json()
    finally:
        for f in file_objects:
            f.close()


 def batch_upload_documents(auth, dataset_id, num, tmp_path):
--- a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_download_document.py
+++ b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_download_document.py
@@ -95,9 +95,8 @@ class TestDownloadDocument:
        )

    @pytest.mark.parametrize(
        "docment_id, expected_code, expected_message",
        "document_id, expected_code, expected_message",
        [
            pytest.param("", 0, "", marks=pytest.mark.xfail(reason="issue#6031")),
            (
                "invalid_document_id",
                102,
@@ -105,14 +104,14 @@ class TestDownloadDocument:
            ),
        ],
    )
    def test_invalid_docment_id(
        self, get_http_api_auth, tmp_path, docment_id, expected_code, expected_message
    def test_invalid_document_id(
        self, get_http_api_auth, tmp_path, document_id, expected_code, expected_message
    ):
        ids = create_datasets(get_http_api_auth, 1)
        res = download_document(
            get_http_api_auth,
            ids[0],
            docment_id,
            document_id,
            tmp_path / "ragflow_test_download_1.txt",
        )
        assert res.status_code == codes.ok
--- a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_update_document.py
+++ b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_update_document.py
@@ -0,0 +1,576 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #


 import pytest
 from common import (
    DOCUMENT_NAME_LIMIT,
    INVALID_API_TOKEN,
    batch_upload_documents,
    create_datasets,
    list_documnet,
    update_documnet,
 )
 from libs.auth import RAGFlowHttpApiAuth


 class TestAuthorization:
    @pytest.mark.parametrize(
        "auth, expected_code, expected_message",
        [
            (None, 0, "`Authorization` can't be empty"),
            (
                RAGFlowHttpApiAuth(INVALID_API_TOKEN),
                109,
                "Authentication error: API key is invalid!",
            ),
        ],
    )
    def test_invalid_auth(
        self, get_http_api_auth, tmp_path, auth, expected_code, expected_message
    ):
        ids = create_datasets(get_http_api_auth, 1)
        document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path)
        res = update_documnet(auth, ids[0], document_ids[0], {"name": "auth_test.txt"})
        assert res["code"] == expected_code
        assert res["message"] == expected_message


 class TestupdatedDocument:
    @pytest.mark.parametrize(
        "name, expected_code, expected_message",
        [
            ("new_name.txt", 0, ""),
            (
                f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt",
                101,
                "The name should be less than 128 bytes.",
            ),
            (
                0,
                100,
                """AttributeError("\'int\' object has no attribute \'encode\'")""",
            ),
            (
                None,
                100,
                """AttributeError("\'NoneType\' object has no attribute \'encode\'")""",
            ),
            (
                "",
                101,
                "The extension of file can't be changed",
            ),
            (
                "ragflow_test_upload_0",
                101,
                "The extension of file can't be changed",
            ),
            (
                "ragflow_test_upload_1.txt",
                102,
                "Duplicated document name in the same dataset.",
            ),
            (
                "RAGFLOW_TEST_UPLOAD_1.TXT",
                0,
                "",
            ),
        ],
    )
    def test_name(
        self, get_http_api_auth, tmp_path, name, expected_code, expected_message
    ):
        ids = create_datasets(get_http_api_auth, 1)
        document_ids = batch_upload_documents(get_http_api_auth, ids[0], 2, tmp_path)
        res = update_documnet(
            get_http_api_auth, ids[0], document_ids[0], {"name": name}
        )
        assert res["code"] == expected_code
        if expected_code == 0:
            res = list_documnet(get_http_api_auth, ids[0], {"id": document_ids[0]})
            assert res["data"]["docs"][0]["name"] == name
        else:
            assert res["message"] == expected_message

    @pytest.mark.parametrize(
        "document_id, expected_code, expected_message",
        [
            ("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"),
            (
                "invalid_document_id",
                102,
                "The dataset doesn't own the document.",
            ),
        ],
    )
    def test_invalid_document_id(
        self, get_http_api_auth, document_id, expected_code, expected_message
    ):
        ids = create_datasets(get_http_api_auth, 1)
        res = update_documnet(
            get_http_api_auth, ids[0], document_id, {"name": "new_name.txt"}
        )
        assert res["code"] == expected_code
        assert res["message"] == expected_message

    @pytest.mark.parametrize(
        "dataset_id, expected_code, expected_message",
        [
            ("", 100, "<NotFound '404: Not Found'>"),
            (
                "invalid_dataset_id",
                102,
                "You don't own the dataset.",
            ),
        ],
    )
    def test_invalid_dataset_id(
        self, get_http_api_auth, tmp_path, dataset_id, expected_code, expected_message
    ):
        ids = create_datasets(get_http_api_auth, 1)
        document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path)
        res = update_documnet(
            get_http_api_auth, dataset_id, document_ids[0], {"name": "new_name.txt"}
        )
        assert res["code"] == expected_code
        assert res["message"] == expected_message

    @pytest.mark.parametrize(
        "meta_fields, expected_code, expected_message",
        [({"test": "test"}, 0, ""), ("test", 102, "meta_fields must be a dictionary")],
    )
    def test_meta_fields(
        self, get_http_api_auth, tmp_path, meta_fields, expected_code, expected_message
    ):
        ids = create_datasets(get_http_api_auth, 1)
        document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path)
        res = update_documnet(
            get_http_api_auth, ids[0], document_ids[0], {"meta_fields": meta_fields}
        )
        if expected_code == 0:
            res = list_documnet(get_http_api_auth, ids[0], {"id": document_ids[0]})
            assert res["data"]["docs"][0]["meta_fields"] == meta_fields
        else:
            assert res["message"] == expected_message

    @pytest.mark.parametrize(
        "chunk_method, expected_code, expected_message",
        [
            ("naive", 0, ""),
            ("manual", 0, ""),
            ("qa", 0, ""),
            pytest.param("table", 0, "", marks=pytest.mark.xfail(reason="issues/6081")),
            ("paper", 0, ""),
            ("book", 0, ""),
            ("laws", 0, ""),
            ("presentation", 0, ""),
            pytest.param(
                "picture", 0, "", marks=pytest.mark.xfail(reason="issues/6081")
            ),
            pytest.param("one", 0, "", marks=pytest.mark.xfail(reason="issues/6081")),
            ("knowledge_graph", 0, ""),
            pytest.param("email", 0, "", marks=pytest.mark.xfail(reason="issues/6081")),
            pytest.param("tag", 0, "", marks=pytest.mark.xfail(reason="issues/6081")),
            ("", 102, "`chunk_method`  doesn't exist"),
            (
                "other_chunk_method",
                102,
                "`chunk_method` other_chunk_method doesn't exist",
            ),
        ],
    )
    def test_chunk_method(
        self, get_http_api_auth, tmp_path, chunk_method, expected_code, expected_message
    ):
        ids = create_datasets(get_http_api_auth, 1)
        document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path)
        res = update_documnet(
            get_http_api_auth, ids[0], document_ids[0], {"chunk_method": chunk_method}
        )
        assert res["code"] == expected_code
        if expected_code == 0:
            res = list_documnet(get_http_api_auth, ids[0], {"id": document_ids[0]})
            if chunk_method != "":
                assert res["data"]["docs"][0]["chunk_method"] == chunk_method
            else:
                assert res["data"]["docs"][0]["chunk_method"] == "naive"
        else:
            assert res["message"] == expected_message

    @pytest.mark.parametrize(
        "chunk_method, parser_config, expected_code, expected_message",
        [
            (
                "naive",
                {
                    "chunk_token_num": 128,
                    "layout_recognize": "DeepDOC",
                    "html4excel": False,
                    "delimiter": "\n!?。；！？",
                    "task_page_size": 12,
                    "raptor": {"use_raptor": False},
                },
                0,
                "",
            ),
            ("naive", {}, 0, ""),
            pytest.param(
                "naive",
                {"chunk_token_num": -1},
                100,
                "AssertionError('chunk_token_num should be in range from 1 to 100000000')",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"chunk_token_num": 0},
                100,
                "AssertionError('chunk_token_num should be in range from 1 to 100000000')",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"chunk_token_num": 100000000},
                100,
                "AssertionError('chunk_token_num should be in range from 1 to 100000000')",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"chunk_token_num": 3.14},
                102,
                "",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"chunk_token_num": "1024"},
                100,
                "",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            (
                "naive",
                {"layout_recognize": "DeepDOC"},
                0,
                "",
            ),
            (
                "naive",
                {"layout_recognize": "Naive"},
                0,
                "",
            ),
            ("naive", {"html4excel": True}, 0, ""),
            ("naive", {"html4excel": False}, 0, ""),
            pytest.param(
                "naive",
                {"html4excel": 1},
                100,
                "AssertionError('html4excel should be True or False')",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            ("naive", {"delimiter": ""}, 0, ""),
            ("naive", {"delimiter": "`##`"}, 0, ""),
            pytest.param(
                "naive",
                {"delimiter": 1},
                100,
                "",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"task_page_size": -1},
                100,
                "AssertionError('task_page_size should be in range from 1 to 100000000')",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"task_page_size": 0},
                100,
                "AssertionError('task_page_size should be in range from 1 to 100000000')",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"task_page_size": 100000000},
                100,
                "AssertionError('task_page_size should be in range from 1 to 100000000')",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"task_page_size": 3.14},
                100,
                "",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"task_page_size": "1024"},
                100,
                "",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            ("naive", {"raptor": {"use_raptor": True}}, 0, ""),
            ("naive", {"raptor": {"use_raptor": False}}, 0, ""),
            pytest.param(
                "naive",
                {"invalid_key": "invalid_value"},
                100,
                """AssertionError("Abnormal \'parser_config\'. Invalid key: invalid_key")""",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"auto_keywords": -1},
                100,
                "AssertionError('auto_keywords should be in range from 0 to 32')",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"auto_keywords": 32},
                100,
                "AssertionError('auto_keywords should be in range from 0 to 32')",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"auto_questions": 3.14},
                100,
                "",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"auto_keywords": "1024"},
                100,
                "",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"auto_questions": -1},
                100,
                "AssertionError('auto_questions should be in range from 0 to 10')",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"auto_questions": 10},
                100,
                "AssertionError('auto_questions should be in range from 0 to 10')",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"auto_questions": 3.14},
                100,
                "",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"auto_questions": "1024"},
                100,
                "",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"topn_tags": -1},
                100,
                "AssertionError('topn_tags should be in range from 0 to 10')",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"topn_tags": 10},
                100,
                "AssertionError('topn_tags should be in range from 0 to 10')",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"topn_tags": 3.14},
                100,
                "",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
            pytest.param(
                "naive",
                {"topn_tags": "1024"},
                100,
                "",
                marks=pytest.mark.xfail(reason="issues/6098"),
            ),
        ],
    )
    def test_parser_config(
        self,
        get_http_api_auth,
        tmp_path,
        chunk_method,
        parser_config,
        expected_code,
        expected_message,
    ):
        ids = create_datasets(get_http_api_auth, 1)
        document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path)
        res = update_documnet(
            get_http_api_auth,
            ids[0],
            document_ids[0],
            {"chunk_method": chunk_method, "parser_config": parser_config},
        )
        assert res["code"] == expected_code
        if expected_code == 0:
            res = list_documnet(get_http_api_auth, ids[0], {"id": document_ids[0]})
            if parser_config != {}:
                for k, v in parser_config.items():
                    assert res["data"]["docs"][0]["parser_config"][k] == v
            else:
                assert res["data"]["docs"][0]["parser_config"] == {
                    "chunk_token_num": 128,
                    "delimiter": "\\n!?;。；！？",
                    "html4excel": False,
                    "layout_recognize": "DeepDOC",
                    "raptor": {"use_raptor": False},
                }
        if expected_code != 0 or expected_message:
            assert res["message"] == expected_message

    @pytest.mark.parametrize(
        "payload, expected_code, expected_message",
        [
            ({"chunk_count": 1}, 102, "Can't change `chunk_count`."),
            pytest.param(
                {"create_date": "Fri, 14 Mar 2025 16:53:42 GMT"},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
            pytest.param(
                {"create_time": 1},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
            pytest.param(
                {"created_by": "ragflow_test"},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
            pytest.param(
                {"dataset_id": "ragflow_test"},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
            pytest.param(
                {"id": "ragflow_test"},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
            pytest.param(
                {"location": "ragflow_test.txt"},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
            pytest.param(
                {"process_begin_at": 1},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
            pytest.param(
                {"process_duation": 1.0},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
            pytest.param({"progress": 1.0}, 102, "Can't change `progress`."),
            pytest.param(
                {"progress_msg": "ragflow_test"},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
            pytest.param(
                {"run": "ragflow_test"},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
            pytest.param(
                {"size": 1},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
            pytest.param(
                {"source_type": "ragflow_test"},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
            pytest.param(
                {"thumbnail": "ragflow_test"},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
            ({"token_count": 1}, 102, "Can't change `token_count`."),
            pytest.param(
                {"type": "ragflow_test"},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
            pytest.param(
                {"update_date": "Fri, 14 Mar 2025 16:33:17 GMT"},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
            pytest.param(
                {"update_time": 1},
                102,
                "The input parameters are invalid.",
                marks=pytest.mark.xfail(reason="issues/6104"),
            ),
        ],
    )
    def test_invalid_field(
        self,
        get_http_api_auth,
        tmp_path,
        payload,
        expected_code,
        expected_message,
    ):
        ids = create_datasets(get_http_api_auth, 1)
        document_ids = batch_upload_documents(get_http_api_auth, ids[0], 2, tmp_path)
        res = update_documnet(get_http_api_auth, ids[0], document_ids[0], payload)
        assert res["code"] == expected_code
        assert res["message"] == expected_message