### What problem does this PR solve? cover [list chunks](https://ragflow.io/docs/v0.17.2/http_api_reference#list-chunks) endpoints ### Type of change - [x] update test casestags/v0.18.0
| echo "Waiting for service to be available..." | echo "Waiting for service to be available..." | ||||
| sleep 5 | sleep 5 | ||||
| done | done | ||||
| cd sdk/python && uv sync --python 3.10 --frozen && uv pip install . && source .venv/bin/activate && cd test/test_http_api && pytest -s --tb=short -m "not slow" | |||||
| cd sdk/python && uv sync --python 3.10 --frozen && uv pip install . && source .venv/bin/activate && cd test/test_http_api && DOC_ENGINE=infinity pytest -s --tb=short -m "not slow" | |||||
| - name: Stop ragflow:nightly | - name: Stop ragflow:nightly | ||||
| if: always() # always run this step even if previous steps failed | if: always() # always run this step even if previous steps failed |
| def batch_add_chunks(auth, dataset_id, document_id, num): | def batch_add_chunks(auth, dataset_id, document_id, num): | ||||
| chunk_ids = [] | chunk_ids = [] | ||||
| for i in range(num): | for i in range(num): | ||||
| res = add_chunk(auth, dataset_id, document_id, {"content": f"ragflow test {i}"}) | |||||
| res = add_chunk(auth, dataset_id, document_id, {"content": f"chunk test {i}"}) | |||||
| chunk_ids.append(res["data"]["chunk"]["id"]) | chunk_ids.append(res["data"]["chunk"]["id"]) | ||||
| return chunk_ids | return chunk_ids |
| import pytest | import pytest | ||||
| from common import batch_create_datasets, bulk_upload_documents, delete_dataset, list_documnet, parse_documnet | |||||
| from common import add_chunk, batch_create_datasets, bulk_upload_documents, delete_dataset, list_documnet, parse_documnet | |||||
| from libs.utils import wait_for | from libs.utils import wait_for | ||||
| @pytest.fixture(scope="class") | @pytest.fixture(scope="class") | ||||
| def get_dataset_id_and_document_id(get_http_api_auth, chunk_management_tmp_dir): | |||||
| def get_dataset_id_and_document_id(get_http_api_auth, chunk_management_tmp_dir, request): | |||||
| def cleanup(): | |||||
| delete_dataset(get_http_api_auth) | |||||
| request.addfinalizer(cleanup) | |||||
| dataset_ids = batch_create_datasets(get_http_api_auth, 1) | dataset_ids = batch_create_datasets(get_http_api_auth, 1) | ||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_ids[0], 1, chunk_management_tmp_dir) | |||||
| parse_documnet(get_http_api_auth, dataset_ids[0], {"document_ids": document_ids}) | |||||
| condition(get_http_api_auth, dataset_ids[0]) | |||||
| dataset_id = dataset_ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 1, chunk_management_tmp_dir) | |||||
| parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| condition(get_http_api_auth, dataset_id) | |||||
| return dataset_id, document_ids[0] | |||||
| @pytest.fixture(scope="class") | |||||
| def add_chunks(get_http_api_auth, get_dataset_id_and_document_id): | |||||
| dataset_id, document_id = get_dataset_id_and_document_id | |||||
| chunk_ids = [] | |||||
| for i in range(4): | |||||
| res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": f"chunk test {i}"}) | |||||
| chunk_ids.append(res["data"]["chunk"]["id"]) | |||||
| # issues/6487 | |||||
| from time import sleep | |||||
| yield dataset_ids[0], document_ids[0] | |||||
| delete_dataset(get_http_api_auth) | |||||
| sleep(1) | |||||
| yield dataset_id, document_id, chunk_ids |
| {"content": 1}, | {"content": 1}, | ||||
| 100, | 100, | ||||
| """TypeError("unsupported operand type(s) for +: \'int\' and \'str\'")""", | """TypeError("unsupported operand type(s) for +: \'int\' and \'str\'")""", | ||||
| marks=pytest.mark.xfail, | |||||
| marks=pytest.mark.skip, | |||||
| ), | ), | ||||
| ({"content": "a"}, 0, ""), | ({"content": "a"}, 0, ""), | ||||
| ({"content": " "}, 102, "`content` is required"), | ({"content": " "}, 102, "`content` is required"), | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "payload, expected_code, expected_message", | "payload, expected_code, expected_message", | ||||
| [ | [ | ||||
| ({"content": "a", "important_keywords": ["a", "b", "c"]}, 0, ""), | |||||
| ({"content": "a", "important_keywords": [""]}, 0, ""), | |||||
| ({"content": "chunk test", "important_keywords": ["a", "b", "c"]}, 0, ""), | |||||
| ({"content": "chunk test", "important_keywords": [""]}, 0, ""), | |||||
| ( | ( | ||||
| {"content": "a", "important_keywords": [1]}, | |||||
| {"content": "chunk test", "important_keywords": [1]}, | |||||
| 100, | 100, | ||||
| "TypeError('sequence item 0: expected str instance, int found')", | "TypeError('sequence item 0: expected str instance, int found')", | ||||
| ), | ), | ||||
| ({"content": "a", "important_keywords": ["a", "a"]}, 0, ""), | |||||
| ({"content": "a", "important_keywords": "abc"}, 102, "`important_keywords` is required to be a list"), | |||||
| ({"content": "a", "important_keywords": 123}, 102, "`important_keywords` is required to be a list"), | |||||
| ({"content": "chunk test", "important_keywords": ["a", "a"]}, 0, ""), | |||||
| ({"content": "chunk test", "important_keywords": "abc"}, 102, "`important_keywords` is required to be a list"), | |||||
| ({"content": "chunk test", "important_keywords": 123}, 102, "`important_keywords` is required to be a list"), | |||||
| ], | ], | ||||
| ) | ) | ||||
| def test_important_keywords(self, get_http_api_auth, get_dataset_id_and_document_id, payload, expected_code, expected_message): | def test_important_keywords(self, get_http_api_auth, get_dataset_id_and_document_id, payload, expected_code, expected_message): | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "payload, expected_code, expected_message", | "payload, expected_code, expected_message", | ||||
| [ | [ | ||||
| ({"content": "a", "questions": ["a", "b", "c"]}, 0, ""), | |||||
| ({"content": "chunk test", "questions": ["a", "b", "c"]}, 0, ""), | |||||
| pytest.param( | pytest.param( | ||||
| {"content": "a", "questions": [""]}, | |||||
| {"content": "chunk test", "questions": [""]}, | |||||
| 0, | 0, | ||||
| "", | "", | ||||
| marks=pytest.mark.xfail(reason="issues/6404"), | |||||
| marks=pytest.mark.skip(reason="issues/6404"), | |||||
| ), | ), | ||||
| ({"content": "a", "questions": [1]}, 100, "TypeError('sequence item 0: expected str instance, int found')"), | |||||
| ({"content": "a", "questions": ["a", "a"]}, 0, ""), | |||||
| ({"content": "a", "questions": "abc"}, 102, "`questions` is required to be a list"), | |||||
| ({"content": "a", "questions": 123}, 102, "`questions` is required to be a list"), | |||||
| ({"content": "chunk test", "questions": [1]}, 100, "TypeError('sequence item 0: expected str instance, int found')"), | |||||
| ({"content": "chunk test", "questions": ["a", "a"]}, 0, ""), | |||||
| ({"content": "chunk test", "questions": "abc"}, 102, "`questions` is required to be a list"), | |||||
| ({"content": "chunk test", "questions": 123}, 102, "`questions` is required to be a list"), | |||||
| ], | ], | ||||
| ) | ) | ||||
| def test_questions(self, get_http_api_auth, get_dataset_id_and_document_id, payload, expected_code, expected_message): | def test_questions(self, get_http_api_auth, get_dataset_id_and_document_id, payload, expected_code, expected_message): | ||||
| ) | ) | ||||
| def test_invalid_document_id(self, get_http_api_auth, get_dataset_id_and_document_id, document_id, expected_code, expected_message): | def test_invalid_document_id(self, get_http_api_auth, get_dataset_id_and_document_id, document_id, expected_code, expected_message): | ||||
| dataset_id, _ = get_dataset_id_and_document_id | dataset_id, _ = get_dataset_id_and_document_id | ||||
| res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "a"}) | |||||
| res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "chunk test"}) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| def test_repeated_add_chunk(self, get_http_api_auth, get_dataset_id_and_document_id): | def test_repeated_add_chunk(self, get_http_api_auth, get_dataset_id_and_document_id): | ||||
| payload = {"content": "a"} | |||||
| payload = {"content": "chunk test"} | |||||
| dataset_id, document_id = get_dataset_id_and_document_id | dataset_id, document_id = get_dataset_id_and_document_id | ||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id) | res = list_chunks(get_http_api_auth, dataset_id, document_id) | ||||
| chunks_count = res["data"]["doc"]["chunk_count"] | chunks_count = res["data"]["doc"]["chunk_count"] | ||||
| def test_add_chunk_to_deleted_document(self, get_http_api_auth, get_dataset_id_and_document_id): | def test_add_chunk_to_deleted_document(self, get_http_api_auth, get_dataset_id_and_document_id): | ||||
| dataset_id, document_id = get_dataset_id_and_document_id | dataset_id, document_id = get_dataset_id_and_document_id | ||||
| delete_documnet(get_http_api_auth, dataset_id, {"ids": [document_id]}) | delete_documnet(get_http_api_auth, dataset_id, {"ids": [document_id]}) | ||||
| res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "a"}) | |||||
| res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "chunk test"}) | |||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == f"You don't own the document {document_id}." | assert res["message"] == f"You don't own the document {document_id}." | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| dataset_id, | dataset_id, | ||||
| document_id, | document_id, | ||||
| {"content": "a"}, | |||||
| {"content": f"chunk test {i}"}, | |||||
| ) | ) | ||||
| for i in range(chunk_num) | for i in range(chunk_num) | ||||
| ] | ] |
| # | |||||
| # Copyright 2025 The InfiniFlow Authors. All Rights Reserved. | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # | |||||
| import os | |||||
| from concurrent.futures import ThreadPoolExecutor | |||||
| import pytest | |||||
| from common import ( | |||||
| INVALID_API_TOKEN, | |||||
| batch_add_chunks, | |||||
| list_chunks, | |||||
| ) | |||||
| from libs.auth import RAGFlowHttpApiAuth | |||||
| class TestAuthorization: | |||||
| @pytest.mark.parametrize( | |||||
| "auth, expected_code, expected_message", | |||||
| [ | |||||
| (None, 0, "`Authorization` can't be empty"), | |||||
| ( | |||||
| RAGFlowHttpApiAuth(INVALID_API_TOKEN), | |||||
| 109, | |||||
| "Authentication error: API key is invalid!", | |||||
| ), | |||||
| ], | |||||
| ) | |||||
| def test_invalid_auth(self, get_dataset_id_and_document_id, auth, expected_code, expected_message): | |||||
| dataset_id, document_id = get_dataset_id_and_document_id | |||||
| res = list_chunks(auth, dataset_id, document_id) | |||||
| assert res["code"] == expected_code | |||||
| assert res["message"] == expected_message | |||||
| class TestChunksList: | |||||
| @pytest.mark.parametrize( | |||||
| "params, expected_code, expected_page_size, expected_message", | |||||
| [ | |||||
| ({"page": None, "page_size": 2}, 0, 2, ""), | |||||
| pytest.param({"page": 0, "page_size": 2}, 100, 0, "ValueError('Search does not support negative slicing.')", marks=pytest.mark.skip), | |||||
| ({"page": 2, "page_size": 2}, 0, 2, ""), | |||||
| ({"page": 3, "page_size": 2}, 0, 1, ""), | |||||
| ({"page": "3", "page_size": 2}, 0, 1, ""), | |||||
| pytest.param({"page": -1, "page_size": 2}, 100, 0, "ValueError('Search does not support negative slicing.')", marks=pytest.mark.skip), | |||||
| pytest.param({"page": "a", "page_size": 2}, 100, 0, """ValueError("invalid literal for int() with base 10: \'a\'")""", marks=pytest.mark.skip), | |||||
| ], | |||||
| ) | |||||
| def test_page(self, get_http_api_auth, add_chunks, params, expected_code, expected_page_size, expected_message): | |||||
| dataset_id, document_id, _ = add_chunks | |||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id, params=params) | |||||
| assert res["code"] == expected_code | |||||
| if expected_code == 0: | |||||
| assert len(res["data"]["chunks"]) == expected_page_size | |||||
| else: | |||||
| assert res["message"] == expected_message | |||||
| @pytest.mark.parametrize( | |||||
| "params, expected_code, expected_page_size, expected_message", | |||||
| [ | |||||
| ({"page_size": None}, 0, 5, ""), | |||||
| pytest.param({"page_size": 0}, 0, 5, "", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="Infinity does not support page_size=0")), | |||||
| pytest.param({"page_size": 0}, 100, 0, "3013", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in [None, "elasticsearch"], reason="Infinity does not support page_size=0")), | |||||
| ({"page_size": 1}, 0, 1, ""), | |||||
| ({"page_size": 6}, 0, 5, ""), | |||||
| ({"page_size": "1"}, 0, 1, ""), | |||||
| pytest.param({"page_size": -1}, 0, 5, "", marks=pytest.mark.skip), | |||||
| pytest.param({"page_size": "a"}, 100, 0, """ValueError("invalid literal for int() with base 10: \'a\'")""", marks=pytest.mark.skip), | |||||
| ], | |||||
| ) | |||||
| def test_page_size(self, get_http_api_auth, add_chunks, params, expected_code, expected_page_size, expected_message): | |||||
| dataset_id, document_id, _ = add_chunks | |||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id, params=params) | |||||
| assert res["code"] == expected_code | |||||
| if expected_code == 0: | |||||
| assert len(res["data"]["chunks"]) == expected_page_size | |||||
| else: | |||||
| assert res["message"] == expected_message | |||||
| @pytest.mark.parametrize( | |||||
| "params, expected_page_size", | |||||
| [ | |||||
| ({"keywords": None}, 5), | |||||
| ({"keywords": ""}, 5), | |||||
| ({"keywords": "1"}, 1), | |||||
| pytest.param({"keywords": "chunk"}, 4, marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6509")), | |||||
| ({"keywords": "ragflow"}, 1), | |||||
| ({"keywords": "unknown"}, 0), | |||||
| ], | |||||
| ) | |||||
| def test_keywords(self, get_http_api_auth, add_chunks, params, expected_page_size): | |||||
| dataset_id, document_id, _ = add_chunks | |||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id, params=params) | |||||
| assert res["code"] == 0 | |||||
| assert len(res["data"]["chunks"]) == expected_page_size | |||||
| @pytest.mark.parametrize( | |||||
| "chunk_id, expected_code, expected_page_size, expected_message", | |||||
| [ | |||||
| (None, 0, 5, ""), | |||||
| ("", 0, 5, ""), | |||||
| pytest.param(lambda r: r[0], 0, 1, "", marks=pytest.mark.skip(reason="issues/6499")), | |||||
| pytest.param("unknown", 102, 0, "You don't own the document unknown.txt.", marks=pytest.mark.skip(reason="issues/6500")), | |||||
| ], | |||||
| ) | |||||
| def test_id( | |||||
| self, | |||||
| get_http_api_auth, | |||||
| add_chunks, | |||||
| chunk_id, | |||||
| expected_code, | |||||
| expected_page_size, | |||||
| expected_message, | |||||
| ): | |||||
| dataset_id, document_id, chunk_ids = add_chunks | |||||
| if callable(chunk_id): | |||||
| params = {"id": chunk_id(chunk_ids)} | |||||
| else: | |||||
| params = {"id": chunk_id} | |||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id, params=params) | |||||
| assert res["code"] == expected_code | |||||
| if expected_code == 0: | |||||
| if params["id"] in [None, ""]: | |||||
| assert len(res["data"]["chunks"]) == expected_page_size | |||||
| else: | |||||
| assert res["data"]["chunks"][0]["id"] == params["id"] | |||||
| else: | |||||
| assert res["message"] == expected_message | |||||
| def test_invalid_params(self, get_http_api_auth, add_chunks): | |||||
| dataset_id, document_id, _ = add_chunks | |||||
| params = {"a": "b"} | |||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id, params=params) | |||||
| assert res["code"] == 0 | |||||
| assert len(res["data"]["chunks"]) == 5 | |||||
| def test_concurrent_list(self, get_http_api_auth, add_chunks): | |||||
| dataset_id, document_id, _ = add_chunks | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | |||||
| futures = [executor.submit(list_chunks, get_http_api_auth, dataset_id, document_id) for i in range(100)] | |||||
| responses = [f.result() for f in futures] | |||||
| assert all(r["code"] == 0 for r in responses) | |||||
| assert all(len(r["data"]["chunks"]) == 5 for r in responses) | |||||
| def test_default(self, get_http_api_auth, get_dataset_id_and_document_id): | |||||
| dataset_id, document_id = get_dataset_id_and_document_id | |||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id) | |||||
| chunks_count = res["data"]["doc"]["chunk_count"] | |||||
| batch_add_chunks(get_http_api_auth, dataset_id, document_id, 31) | |||||
| # issues/6487 | |||||
| from time import sleep | |||||
| sleep(3) | |||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id) | |||||
| assert res["code"] == 0 | |||||
| assert len(res["data"]["chunks"]) == 30 | |||||
| assert res["data"]["doc"]["chunk_count"] == chunks_count + 31 | |||||
| @pytest.mark.parametrize( | |||||
| "dataset_id, expected_code, expected_message", | |||||
| [ | |||||
| ("", 100, "<NotFound '404: Not Found'>"), | |||||
| ( | |||||
| "invalid_dataset_id", | |||||
| 102, | |||||
| "You don't own the dataset invalid_dataset_id.", | |||||
| ), | |||||
| ], | |||||
| ) | |||||
| def test_invalid_dataset_id(self, get_http_api_auth, add_chunks, dataset_id, expected_code, expected_message): | |||||
| _, document_id, _ = add_chunks | |||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id) | |||||
| assert res["code"] == expected_code | |||||
| assert res["message"] == expected_message | |||||
| @pytest.mark.parametrize( | |||||
| "document_id, expected_code, expected_message", | |||||
| [ | |||||
| ("", 102, "The dataset not own the document chunks."), | |||||
| ( | |||||
| "invalid_document_id", | |||||
| 102, | |||||
| "You don't own the document invalid_document_id.", | |||||
| ), | |||||
| ], | |||||
| ) | |||||
| def test_invalid_document_id(self, get_http_api_auth, add_chunks, document_id, expected_code, expected_message): | |||||
| dataset_id, _, _ = add_chunks | |||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id) | |||||
| assert res["code"] == expected_code | |||||
| assert res["message"] == expected_message |
| @pytest.fixture(scope="class") | @pytest.fixture(scope="class") | ||||
| def get_dataset_ids(get_http_api_auth): | |||||
| ids = batch_create_datasets(get_http_api_auth, 5) | |||||
| yield ids | |||||
| delete_dataset(get_http_api_auth) | |||||
| def get_dataset_ids(get_http_api_auth, request): | |||||
| def cleanup(): | |||||
| delete_dataset(get_http_api_auth) | |||||
| request.addfinalizer(cleanup) | |||||
| return batch_create_datasets(get_http_api_auth, 5) |
| 100, | 100, | ||||
| 0, | 0, | ||||
| "1064", | "1064", | ||||
| marks=pytest.mark.xfail(reason="issues/5851"), | |||||
| marks=pytest.mark.skip(reason="issues/5851"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"page": "a", "page_size": 2}, | {"page": "a", "page_size": 2}, | ||||
| 100, | 100, | ||||
| 0, | 0, | ||||
| """ValueError("invalid literal for int() with base 10: \'a\'")""", | """ValueError("invalid literal for int() with base 10: \'a\'")""", | ||||
| marks=pytest.mark.xfail(reason="issues/5851"), | |||||
| marks=pytest.mark.skip(reason="issues/5851"), | |||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| 100, | 100, | ||||
| 0, | 0, | ||||
| "1064", | "1064", | ||||
| marks=pytest.mark.xfail(reason="issues/5851"), | |||||
| marks=pytest.mark.skip(reason="issues/5851"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"page_size": "a"}, | {"page_size": "a"}, | ||||
| 100, | 100, | ||||
| 0, | 0, | ||||
| """ValueError("invalid literal for int() with base 10: \'a\'")""", | """ValueError("invalid literal for int() with base 10: \'a\'")""", | ||||
| marks=pytest.mark.xfail(reason="issues/5851"), | |||||
| marks=pytest.mark.skip(reason="issues/5851"), | |||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| 0, | 0, | ||||
| lambda r: (is_sorted(r["data"]["docs"], "name", False)), | lambda r: (is_sorted(r["data"]["docs"], "name", False)), | ||||
| "", | "", | ||||
| marks=pytest.mark.xfail(reason="issues/5851"), | |||||
| marks=pytest.mark.skip(reason="issues/5851"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"orderby": "unknown"}, | {"orderby": "unknown"}, | ||||
| 102, | 102, | ||||
| 0, | 0, | ||||
| "orderby should be create_time or update_time", | "orderby should be create_time or update_time", | ||||
| marks=pytest.mark.xfail(reason="issues/5851"), | |||||
| marks=pytest.mark.skip(reason="issues/5851"), | |||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| 102, | 102, | ||||
| 0, | 0, | ||||
| "desc should be true or false", | "desc should be true or false", | ||||
| marks=pytest.mark.xfail(reason="issues/5851"), | |||||
| marks=pytest.mark.skip(reason="issues/5851"), | |||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) |
| 100, | 100, | ||||
| """AttributeError("\'NoneType\' object has no attribute \'strip\'")""", | """AttributeError("\'NoneType\' object has no attribute \'strip\'")""", | ||||
| ), | ), | ||||
| pytest.param("", 102, "", marks=pytest.mark.xfail(reason="issue#5915")), | |||||
| pytest.param("", 102, "", marks=pytest.mark.skip(reason="issue/5915")), | |||||
| ("dataset_1", 102, "Duplicated dataset name in updating dataset."), | ("dataset_1", 102, "Duplicated dataset name in updating dataset."), | ||||
| ("DATASET_1", 102, "Duplicated dataset name in updating dataset."), | ("DATASET_1", 102, "Duplicated dataset name in updating dataset."), | ||||
| ], | ], |
| @pytest.fixture(scope="class") | @pytest.fixture(scope="class") | ||||
| def get_dataset_id_and_document_ids(get_http_api_auth, file_management_tmp_dir): | |||||
| def get_dataset_id_and_document_ids(get_http_api_auth, file_management_tmp_dir, request): | |||||
| def cleanup(): | |||||
| delete_dataset(get_http_api_auth) | |||||
| request.addfinalizer(cleanup) | |||||
| dataset_ids = batch_create_datasets(get_http_api_auth, 1) | dataset_ids = batch_create_datasets(get_http_api_auth, 1) | ||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_ids[0], 5, file_management_tmp_dir) | |||||
| yield dataset_ids[0], document_ids | |||||
| delete_dataset(get_http_api_auth) | |||||
| dataset_id = dataset_ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 5, file_management_tmp_dir) | |||||
| return dataset_id, document_ids |
| 100, | 100, | ||||
| 0, | 0, | ||||
| "1064", | "1064", | ||||
| marks=pytest.mark.xfail(reason="issues/5851"), | |||||
| marks=pytest.mark.skip(reason="issues/5851"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"page": "a", "page_size": 2}, | {"page": "a", "page_size": 2}, | ||||
| 100, | 100, | ||||
| 0, | 0, | ||||
| """ValueError("invalid literal for int() with base 10: \'a\'")""", | """ValueError("invalid literal for int() with base 10: \'a\'")""", | ||||
| marks=pytest.mark.xfail(reason="issues/5851"), | |||||
| marks=pytest.mark.skip(reason="issues/5851"), | |||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| 100, | 100, | ||||
| 0, | 0, | ||||
| "1064", | "1064", | ||||
| marks=pytest.mark.xfail(reason="issues/5851"), | |||||
| marks=pytest.mark.skip(reason="issues/5851"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"page_size": "a"}, | {"page_size": "a"}, | ||||
| 100, | 100, | ||||
| 0, | 0, | ||||
| """ValueError("invalid literal for int() with base 10: \'a\'")""", | """ValueError("invalid literal for int() with base 10: \'a\'")""", | ||||
| marks=pytest.mark.xfail(reason="issues/5851"), | |||||
| marks=pytest.mark.skip(reason="issues/5851"), | |||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| 0, | 0, | ||||
| lambda r: (is_sorted(r["data"]["docs"], "name", False)), | lambda r: (is_sorted(r["data"]["docs"], "name", False)), | ||||
| "", | "", | ||||
| marks=pytest.mark.xfail(reason="issues/5851"), | |||||
| marks=pytest.mark.skip(reason="issues/5851"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"orderby": "unknown"}, | {"orderby": "unknown"}, | ||||
| 102, | 102, | ||||
| 0, | 0, | ||||
| "orderby should be create_time or update_time", | "orderby should be create_time or update_time", | ||||
| marks=pytest.mark.xfail(reason="issues/5851"), | |||||
| marks=pytest.mark.skip(reason="issues/5851"), | |||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| 0, | 0, | ||||
| lambda r: (is_sorted(r["data"]["docs"], "create_time", False)), | lambda r: (is_sorted(r["data"]["docs"], "create_time", False)), | ||||
| "", | "", | ||||
| marks=pytest.mark.xfail(reason="issues/5851"), | |||||
| marks=pytest.mark.skip(reason="issues/5851"), | |||||
| ), | ), | ||||
| ( | ( | ||||
| {"desc": "False"}, | {"desc": "False"}, | ||||
| 102, | 102, | ||||
| 0, | 0, | ||||
| "desc should be true or false", | "desc should be true or false", | ||||
| marks=pytest.mark.xfail(reason="issues/5851"), | |||||
| marks=pytest.mark.skip(reason="issues/5851"), | |||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) |
| {"create_date": "Fri, 14 Mar 2025 16:53:42 GMT"}, | {"create_date": "Fri, 14 Mar 2025 16:53:42 GMT"}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"create_time": 1}, | {"create_time": 1}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"created_by": "ragflow_test"}, | {"created_by": "ragflow_test"}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"dataset_id": "ragflow_test"}, | {"dataset_id": "ragflow_test"}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"id": "ragflow_test"}, | {"id": "ragflow_test"}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"location": "ragflow_test.txt"}, | {"location": "ragflow_test.txt"}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"process_begin_at": 1}, | {"process_begin_at": 1}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"process_duation": 1.0}, | {"process_duation": 1.0}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| pytest.param({"progress": 1.0}, 102, "Can't change `progress`."), | pytest.param({"progress": 1.0}, 102, "Can't change `progress`."), | ||||
| pytest.param( | pytest.param( | ||||
| {"progress_msg": "ragflow_test"}, | {"progress_msg": "ragflow_test"}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"run": "ragflow_test"}, | {"run": "ragflow_test"}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"size": 1}, | {"size": 1}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"source_type": "ragflow_test"}, | {"source_type": "ragflow_test"}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"thumbnail": "ragflow_test"}, | {"thumbnail": "ragflow_test"}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| ({"token_count": 1}, 102, "Can't change `token_count`."), | ({"token_count": 1}, 102, "Can't change `token_count`."), | ||||
| pytest.param( | pytest.param( | ||||
| {"type": "ragflow_test"}, | {"type": "ragflow_test"}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"update_date": "Fri, 14 Mar 2025 16:33:17 GMT"}, | {"update_date": "Fri, 14 Mar 2025 16:33:17 GMT"}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| {"update_time": 1}, | {"update_time": 1}, | ||||
| 102, | 102, | ||||
| "The input parameters are invalid.", | "The input parameters are invalid.", | ||||
| marks=pytest.mark.xfail(reason="issues/6104"), | |||||
| marks=pytest.mark.skip(reason="issues/6104"), | |||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| {"chunk_token_num": -1}, | {"chunk_token_num": -1}, | ||||
| 100, | 100, | ||||
| "AssertionError('chunk_token_num should be in range from 1 to 100000000')", | "AssertionError('chunk_token_num should be in range from 1 to 100000000')", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"chunk_token_num": 0}, | {"chunk_token_num": 0}, | ||||
| 100, | 100, | ||||
| "AssertionError('chunk_token_num should be in range from 1 to 100000000')", | "AssertionError('chunk_token_num should be in range from 1 to 100000000')", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"chunk_token_num": 100000000}, | {"chunk_token_num": 100000000}, | ||||
| 100, | 100, | ||||
| "AssertionError('chunk_token_num should be in range from 1 to 100000000')", | "AssertionError('chunk_token_num should be in range from 1 to 100000000')", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"chunk_token_num": 3.14}, | {"chunk_token_num": 3.14}, | ||||
| 102, | 102, | ||||
| "", | "", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"chunk_token_num": "1024"}, | {"chunk_token_num": "1024"}, | ||||
| 100, | 100, | ||||
| "", | "", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| ( | ( | ||||
| "naive", | "naive", | ||||
| {"html4excel": 1}, | {"html4excel": 1}, | ||||
| 100, | 100, | ||||
| "AssertionError('html4excel should be True or False')", | "AssertionError('html4excel should be True or False')", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| ("naive", {"delimiter": ""}, 0, ""), | ("naive", {"delimiter": ""}, 0, ""), | ||||
| ("naive", {"delimiter": "`##`"}, 0, ""), | ("naive", {"delimiter": "`##`"}, 0, ""), | ||||
| {"delimiter": 1}, | {"delimiter": 1}, | ||||
| 100, | 100, | ||||
| "", | "", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"task_page_size": -1}, | {"task_page_size": -1}, | ||||
| 100, | 100, | ||||
| "AssertionError('task_page_size should be in range from 1 to 100000000')", | "AssertionError('task_page_size should be in range from 1 to 100000000')", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"task_page_size": 0}, | {"task_page_size": 0}, | ||||
| 100, | 100, | ||||
| "AssertionError('task_page_size should be in range from 1 to 100000000')", | "AssertionError('task_page_size should be in range from 1 to 100000000')", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"task_page_size": 100000000}, | {"task_page_size": 100000000}, | ||||
| 100, | 100, | ||||
| "AssertionError('task_page_size should be in range from 1 to 100000000')", | "AssertionError('task_page_size should be in range from 1 to 100000000')", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"task_page_size": 3.14}, | {"task_page_size": 3.14}, | ||||
| 100, | 100, | ||||
| "", | "", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"task_page_size": "1024"}, | {"task_page_size": "1024"}, | ||||
| 100, | 100, | ||||
| "", | "", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| ("naive", {"raptor": {"use_raptor": True}}, 0, ""), | ("naive", {"raptor": {"use_raptor": True}}, 0, ""), | ||||
| ("naive", {"raptor": {"use_raptor": False}}, 0, ""), | ("naive", {"raptor": {"use_raptor": False}}, 0, ""), | ||||
| {"invalid_key": "invalid_value"}, | {"invalid_key": "invalid_value"}, | ||||
| 100, | 100, | ||||
| """AssertionError("Abnormal \'parser_config\'. Invalid key: invalid_key")""", | """AssertionError("Abnormal \'parser_config\'. Invalid key: invalid_key")""", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"auto_keywords": -1}, | {"auto_keywords": -1}, | ||||
| 100, | 100, | ||||
| "AssertionError('auto_keywords should be in range from 0 to 32')", | "AssertionError('auto_keywords should be in range from 0 to 32')", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"auto_keywords": 32}, | {"auto_keywords": 32}, | ||||
| 100, | 100, | ||||
| "AssertionError('auto_keywords should be in range from 0 to 32')", | "AssertionError('auto_keywords should be in range from 0 to 32')", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"auto_questions": 3.14}, | {"auto_questions": 3.14}, | ||||
| 100, | 100, | ||||
| "", | "", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"auto_keywords": "1024"}, | {"auto_keywords": "1024"}, | ||||
| 100, | 100, | ||||
| "", | "", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"auto_questions": -1}, | {"auto_questions": -1}, | ||||
| 100, | 100, | ||||
| "AssertionError('auto_questions should be in range from 0 to 10')", | "AssertionError('auto_questions should be in range from 0 to 10')", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"auto_questions": 10}, | {"auto_questions": 10}, | ||||
| 100, | 100, | ||||
| "AssertionError('auto_questions should be in range from 0 to 10')", | "AssertionError('auto_questions should be in range from 0 to 10')", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"auto_questions": 3.14}, | {"auto_questions": 3.14}, | ||||
| 100, | 100, | ||||
| "", | "", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"auto_questions": "1024"}, | {"auto_questions": "1024"}, | ||||
| 100, | 100, | ||||
| "", | "", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"topn_tags": -1}, | {"topn_tags": -1}, | ||||
| 100, | 100, | ||||
| "AssertionError('topn_tags should be in range from 0 to 10')", | "AssertionError('topn_tags should be in range from 0 to 10')", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"topn_tags": 10}, | {"topn_tags": 10}, | ||||
| 100, | 100, | ||||
| "AssertionError('topn_tags should be in range from 0 to 10')", | "AssertionError('topn_tags should be in range from 0 to 10')", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"topn_tags": 3.14}, | {"topn_tags": 3.14}, | ||||
| 100, | 100, | ||||
| "", | "", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| pytest.param( | pytest.param( | ||||
| "naive", | "naive", | ||||
| {"topn_tags": "1024"}, | {"topn_tags": "1024"}, | ||||
| 100, | 100, | ||||
| "", | "", | ||||
| marks=pytest.mark.xfail(reason="issues/6098"), | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) |