### What problem does this PR solve? Refactor test fixtures and test cases ### Type of change - [ ] Refactoring test casestags/v0.18.0
| # | # | ||||
| import os | import os | ||||
| import pytest | import pytest | ||||
| import requests | import requests | ||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380') | |||||
| HOST_ADDRESS = os.getenv("HOST_ADDRESS", "http://127.0.0.1:9380") | |||||
| # def generate_random_email(): | # def generate_random_email(): | ||||
| # return 'user_' + ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))+'@1.com' | # return 'user_' + ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))+'@1.com' | ||||
| def generate_email(): | def generate_email(): | ||||
| return 'user_123@1.com' | |||||
| return "user_123@1.com" | |||||
| EMAIL = generate_email() | EMAIL = generate_email() | ||||
| # password is "123" | # password is "123" | ||||
| PASSWORD = '''ctAseGvejiaSWWZ88T/m4FQVOpQyUvP+x7sXtdv3feqZACiQleuewkUi35E16wSd5C5QcnkkcV9cYc8TKPTRZlxappDuirxghxoOvFcJxFU4ixLsD | |||||
| PASSWORD = """ctAseGvejiaSWWZ88T/m4FQVOpQyUvP+x7sXtdv3feqZACiQleuewkUi35E16wSd5C5QcnkkcV9cYc8TKPTRZlxappDuirxghxoOvFcJxFU4ixLsD | |||||
| fN33jCHRoDUW81IH9zjij/vaw8IbVyb6vuwg6MX6inOEBRRzVbRYxXOu1wkWY6SsI8X70oF9aeLFp/PzQpjoe/YbSqpTq8qqrmHzn9vO+yvyYyvmDsphXe | fN33jCHRoDUW81IH9zjij/vaw8IbVyb6vuwg6MX6inOEBRRzVbRYxXOu1wkWY6SsI8X70oF9aeLFp/PzQpjoe/YbSqpTq8qqrmHzn9vO+yvyYyvmDsphXe | ||||
| X8f7fp9c7vUsfOCkM+gHY3PadG+QHa7KI7mzTKgUTZImK6BZtfRBATDTthEUbbaTewY4H0MnWiCeeDhcbeQao6cFy1To8pE3RpmxnGnS8BsBn8w==''' | |||||
| X8f7fp9c7vUsfOCkM+gHY3PadG+QHa7KI7mzTKgUTZImK6BZtfRBATDTthEUbbaTewY4H0MnWiCeeDhcbeQao6cFy1To8pE3RpmxnGnS8BsBn8w==""" | |||||
| def register(): | def register(): | ||||
| @pytest.fixture(scope="session") | @pytest.fixture(scope="session") | ||||
| def get_http_api_auth(get_api_key_fixture): | def get_http_api_auth(get_api_key_fixture): | ||||
| return RAGFlowHttpApiAuth(get_api_key_fixture) | return RAGFlowHttpApiAuth(get_api_key_fixture) | ||||
| def get_my_llms(auth, name): | |||||
| url = HOST_ADDRESS + "/v1/llm/my_llms" | |||||
| authorization = {"Authorization": auth} | |||||
| response = requests.get(url=url, headers=authorization) | |||||
| res = response.json() | |||||
| if res.get("code") != 0: | |||||
| raise Exception(res.get("message")) | |||||
| if name in res.get("data"): | |||||
| return True | |||||
| return False | |||||
| def add_models(auth): | |||||
| url = HOST_ADDRESS + "/v1/llm/set_api_key" | |||||
| authorization = {"Authorization": auth} | |||||
| models_info = { | |||||
| "ZHIPU-AI": {"llm_factory": "ZHIPU-AI", "api_key": "d06253dacd404180aa8afb096fcb6c30.KatwBIUpvCSml9sU"}, | |||||
| } | |||||
| for name, model_info in models_info.items(): | |||||
| if not get_my_llms(auth, name): | |||||
| response = requests.post(url=url, headers=authorization, json=model_info) | |||||
| res = response.json() | |||||
| if res.get("code") != 0: | |||||
| raise Exception(res.get("message")) | |||||
| def get_tenant_info(auth): | |||||
| url = HOST_ADDRESS + "/v1/user/tenant_info" | |||||
| authorization = {"Authorization": auth} | |||||
| response = requests.get(url=url, headers=authorization) | |||||
| res = response.json() | |||||
| if res.get("code") != 0: | |||||
| raise Exception(res.get("message")) | |||||
| return res["data"].get("tenant_id") | |||||
| @pytest.fixture(scope="session", autouse=True) | |||||
| def set_tenant_info(get_auth): | |||||
| auth = get_auth | |||||
| try: | |||||
| add_models(auth) | |||||
| tenant_id = get_tenant_info(auth) | |||||
| except Exception as e: | |||||
| raise Exception(e) | |||||
| url = HOST_ADDRESS + "/v1/user/set_tenant_info" | |||||
| authorization = {"Authorization": get_auth} | |||||
| tenant_info = { | |||||
| "tenant_id": tenant_id, | |||||
| "llm_id": "glm-4-flash@ZHIPU-AI", | |||||
| "embd_id": "embedding-3@ZHIPU-AI", | |||||
| "img2txt_id": "glm-4v@ZHIPU-AI", | |||||
| "asr_id": "", | |||||
| "tts_id": None, | |||||
| } | |||||
| response = requests.post(url=url, headers=authorization, json=tenant_info) | |||||
| res = response.json() | |||||
| if res.get("code") != 0: | |||||
| raise Exception(res.get("message")) | 
| FILE_API_URL = "/api/v1/datasets/{dataset_id}/documents" | FILE_API_URL = "/api/v1/datasets/{dataset_id}/documents" | ||||
| FILE_CHUNK_API_URL = "/api/v1/datasets/{dataset_id}/chunks" | FILE_CHUNK_API_URL = "/api/v1/datasets/{dataset_id}/chunks" | ||||
| CHUNK_API_URL = "/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks" | CHUNK_API_URL = "/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks" | ||||
| CHAT_ASSISTANT_API_URL = "/api/v1/chats" | |||||
| INVALID_API_TOKEN = "invalid_key_123" | INVALID_API_TOKEN = "invalid_key_123" | ||||
| DATASET_NAME_LIMIT = 128 | DATASET_NAME_LIMIT = 128 | ||||
| return res.json() | return res.json() | ||||
| def list_dataset(auth, params=None): | |||||
| def list_datasets(auth, params=None): | |||||
| res = requests.get(url=f"{HOST_ADDRESS}{DATASETS_API_URL}", headers=HEADERS, auth=auth, params=params) | res = requests.get(url=f"{HOST_ADDRESS}{DATASETS_API_URL}", headers=HEADERS, auth=auth, params=params) | ||||
| return res.json() | return res.json() | ||||
| return res.json() | return res.json() | ||||
| def delete_dataset(auth, payload=None): | |||||
| def delete_datasets(auth, payload=None): | |||||
| res = requests.delete(url=f"{HOST_ADDRESS}{DATASETS_API_URL}", headers=HEADERS, auth=auth, json=payload) | res = requests.delete(url=f"{HOST_ADDRESS}{DATASETS_API_URL}", headers=HEADERS, auth=auth, json=payload) | ||||
| return res.json() | return res.json() | ||||
| return res | return res | ||||
| def list_documnet(auth, dataset_id, params=None): | |||||
| def list_documnets(auth, dataset_id, params=None): | |||||
| url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id) | url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id) | ||||
| res = requests.get(url=url, headers=HEADERS, auth=auth, params=params) | res = requests.get(url=url, headers=HEADERS, auth=auth, params=params) | ||||
| return res.json() | return res.json() | ||||
| return res.json() | return res.json() | ||||
| def delete_documnet(auth, dataset_id, payload=None): | |||||
| def delete_documnets(auth, dataset_id, payload=None): | |||||
| url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id) | url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id) | ||||
| res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload) | res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload) | ||||
| return res.json() | return res.json() | ||||
| def parse_documnet(auth, dataset_id, payload=None): | |||||
| def parse_documnets(auth, dataset_id, payload=None): | |||||
| url = f"{HOST_ADDRESS}{FILE_CHUNK_API_URL}".format(dataset_id=dataset_id) | url = f"{HOST_ADDRESS}{FILE_CHUNK_API_URL}".format(dataset_id=dataset_id) | ||||
| res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload) | res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload) | ||||
| return res.json() | return res.json() | ||||
| def stop_parse_documnet(auth, dataset_id, payload=None): | |||||
| def stop_parse_documnets(auth, dataset_id, payload=None): | |||||
| url = f"{HOST_ADDRESS}{FILE_CHUNK_API_URL}".format(dataset_id=dataset_id) | url = f"{HOST_ADDRESS}{FILE_CHUNK_API_URL}".format(dataset_id=dataset_id) | ||||
| res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload) | res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload) | ||||
| return res.json() | return res.json() | ||||
| res = add_chunk(auth, dataset_id, document_id, {"content": f"chunk test {i}"}) | res = add_chunk(auth, dataset_id, document_id, {"content": f"chunk test {i}"}) | ||||
| chunk_ids.append(res["data"]["chunk"]["id"]) | chunk_ids.append(res["data"]["chunk"]["id"]) | ||||
| return chunk_ids | return chunk_ids | ||||
| # CHAT ASSISTANT MANAGEMENT | |||||
| def create_chat_assistant(auth, payload=None): | |||||
| url = f"{HOST_ADDRESS}{CHAT_ASSISTANT_API_URL}" | |||||
| res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload) | |||||
| return res.json() | |||||
| def list_chat_assistants(auth, params=None): | |||||
| url = f"{HOST_ADDRESS}{CHAT_ASSISTANT_API_URL}" | |||||
| res = requests.get(url=url, headers=HEADERS, auth=auth, params=params) | |||||
| return res.json() | |||||
| def update_chat_assistant(auth, chat_assistant_id, payload=None): | |||||
| url = f"{HOST_ADDRESS}{CHAT_ASSISTANT_API_URL}/{chat_assistant_id}" | |||||
| res = requests.put(url=url, headers=HEADERS, auth=auth, json=payload) | |||||
| return res.json() | |||||
| def delete_chat_assistants(auth, payload=None): | |||||
| url = f"{HOST_ADDRESS}{CHAT_ASSISTANT_API_URL}" | |||||
| res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload) | |||||
| return res.json() | |||||
| def batch_create_chat_assistants(auth, num): | |||||
| chat_assistant_ids = [] | |||||
| for i in range(num): | |||||
| res = create_chat_assistant(auth, {"name": f"test_chat_assistant_{i}"}) | |||||
| chat_assistant_ids.append(res["data"]["id"]) | |||||
| return chat_assistant_ids | 
| # limitations under the License. | # limitations under the License. | ||||
| # | # | ||||
| import pytest | import pytest | ||||
| from common import delete_dataset | |||||
| from common import batch_create_datasets, bulk_upload_documents, delete_datasets | |||||
| from libs.utils.file_utils import ( | from libs.utils.file_utils import ( | ||||
| create_docx_file, | create_docx_file, | ||||
| create_eml_file, | create_eml_file, | ||||
| @pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
| def clear_datasets(get_http_api_auth): | def clear_datasets(get_http_api_auth): | ||||
| yield | yield | ||||
| delete_dataset(get_http_api_auth) | |||||
| delete_datasets(get_http_api_auth) | |||||
| @pytest.fixture | @pytest.fixture | ||||
| creator_func(file_path) | creator_func(file_path) | ||||
| files[file_type] = file_path | files[file_type] = file_path | ||||
| return files | return files | ||||
| @pytest.fixture(scope="class") | |||||
| def ragflow_tmp_dir(request, tmp_path_factory): | |||||
| class_name = request.cls.__name__ | |||||
| return tmp_path_factory.mktemp(class_name) | |||||
| @pytest.fixture(scope="class") | |||||
| def add_dataset(request, get_http_api_auth): | |||||
| def cleanup(): | |||||
| delete_datasets(get_http_api_auth) | |||||
| request.addfinalizer(cleanup) | |||||
| dataset_ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| return dataset_ids[0] | |||||
| @pytest.fixture(scope="function") | |||||
| def add_dataset_func(request, get_http_api_auth): | |||||
| def cleanup(): | |||||
| delete_datasets(get_http_api_auth) | |||||
| request.addfinalizer(cleanup) | |||||
| dataset_ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| return dataset_ids[0] | |||||
| @pytest.fixture(scope="class") | |||||
| def add_document(get_http_api_auth, add_dataset, ragflow_tmp_dir): | |||||
| dataset_id = add_dataset | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 1, ragflow_tmp_dir) | |||||
| return dataset_id, document_ids[0] | 
| import pytest | import pytest | ||||
| from common import add_chunk, batch_create_datasets, bulk_upload_documents, delete_chunks, delete_dataset, list_documnet, parse_documnet | |||||
| from common import add_chunk, delete_chunks, list_documnets, parse_documnets | |||||
| from libs.utils import wait_for | from libs.utils import wait_for | ||||
| @wait_for(10, 1, "Document parsing timeout") | @wait_for(10, 1, "Document parsing timeout") | ||||
| def condition(_auth, _dataset_id): | def condition(_auth, _dataset_id): | ||||
| res = list_documnet(_auth, _dataset_id) | |||||
| res = list_documnets(_auth, _dataset_id) | |||||
| for doc in res["data"]["docs"]: | for doc in res["data"]["docs"]: | ||||
| if doc["run"] != "DONE": | if doc["run"] != "DONE": | ||||
| return False | return False | ||||
| @pytest.fixture(scope="class") | @pytest.fixture(scope="class") | ||||
| def chunk_management_tmp_dir(tmp_path_factory): | |||||
| return tmp_path_factory.mktemp("chunk_management") | |||||
| @pytest.fixture(scope="class") | |||||
| def get_dataset_id_and_document_id(get_http_api_auth, chunk_management_tmp_dir, request): | |||||
| def cleanup(): | |||||
| delete_dataset(get_http_api_auth) | |||||
| request.addfinalizer(cleanup) | |||||
| dataset_ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| dataset_id = dataset_ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 1, chunk_management_tmp_dir) | |||||
| parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| def add_chunks(get_http_api_auth, add_document): | |||||
| dataset_id, document_id = add_document | |||||
| parse_documnets(get_http_api_auth, dataset_id, {"document_ids": [document_id]}) | |||||
| condition(get_http_api_auth, dataset_id) | condition(get_http_api_auth, dataset_id) | ||||
| return dataset_id, document_ids[0] | |||||
| @pytest.fixture(scope="class") | |||||
| def add_chunks(get_http_api_auth, get_dataset_id_and_document_id): | |||||
| dataset_id, document_id = get_dataset_id_and_document_id | |||||
| chunk_ids = [] | chunk_ids = [] | ||||
| for i in range(4): | for i in range(4): | ||||
| res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": f"chunk test {i}"}) | res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": f"chunk test {i}"}) | ||||
| @pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
| def add_chunks_func(get_http_api_auth, get_dataset_id_and_document_id, request): | |||||
| dataset_id, document_id = get_dataset_id_and_document_id | |||||
| def add_chunks_func(request, get_http_api_auth, add_document): | |||||
| dataset_id, document_id = add_document | |||||
| parse_documnets(get_http_api_auth, dataset_id, {"document_ids": [document_id]}) | |||||
| condition(get_http_api_auth, dataset_id) | |||||
| chunk_ids = [] | chunk_ids = [] | ||||
| for i in range(4): | for i in range(4): | 
| from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
| import pytest | import pytest | ||||
| from common import INVALID_API_TOKEN, add_chunk, delete_documnet, list_chunks | |||||
| from common import INVALID_API_TOKEN, add_chunk, delete_documnets, list_chunks | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, auth, expected_code, expected_message): | def test_invalid_auth(self, auth, expected_code, expected_message): | ||||
| res = add_chunk(auth, "dataset_id", "document_id", {}) | |||||
| res = add_chunk(auth, "dataset_id", "document_id") | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| ({"content": "\n!?。;!?\"'"}, 0, ""), | ({"content": "\n!?。;!?\"'"}, 0, ""), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_content(self, get_http_api_auth, get_dataset_id_and_document_id, payload, expected_code, expected_message): | |||||
| dataset_id, document_id = get_dataset_id_and_document_id | |||||
| def test_content(self, get_http_api_auth, add_document, payload, expected_code, expected_message): | |||||
| dataset_id, document_id = add_document | |||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id) | res = list_chunks(get_http_api_auth, dataset_id, document_id) | ||||
| if res["code"] != 0: | if res["code"] != 0: | ||||
| assert False, res | assert False, res | ||||
| ({"content": "chunk test", "important_keywords": 123}, 102, "`important_keywords` is required to be a list"), | ({"content": "chunk test", "important_keywords": 123}, 102, "`important_keywords` is required to be a list"), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_important_keywords(self, get_http_api_auth, get_dataset_id_and_document_id, payload, expected_code, expected_message): | |||||
| dataset_id, document_id = get_dataset_id_and_document_id | |||||
| def test_important_keywords(self, get_http_api_auth, add_document, payload, expected_code, expected_message): | |||||
| dataset_id, document_id = add_document | |||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id) | res = list_chunks(get_http_api_auth, dataset_id, document_id) | ||||
| if res["code"] != 0: | if res["code"] != 0: | ||||
| assert False, res | assert False, res | ||||
| ({"content": "chunk test", "questions": 123}, 102, "`questions` is required to be a list"), | ({"content": "chunk test", "questions": 123}, 102, "`questions` is required to be a list"), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_questions(self, get_http_api_auth, get_dataset_id_and_document_id, payload, expected_code, expected_message): | |||||
| dataset_id, document_id = get_dataset_id_and_document_id | |||||
| def test_questions(self, get_http_api_auth, add_document, payload, expected_code, expected_message): | |||||
| dataset_id, document_id = add_document | |||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id) | res = list_chunks(get_http_api_auth, dataset_id, document_id) | ||||
| if res["code"] != 0: | if res["code"] != 0: | ||||
| assert False, res | assert False, res | ||||
| def test_invalid_dataset_id( | def test_invalid_dataset_id( | ||||
| self, | self, | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| get_dataset_id_and_document_id, | |||||
| add_document, | |||||
| dataset_id, | dataset_id, | ||||
| expected_code, | expected_code, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| _, document_id = get_dataset_id_and_document_id | |||||
| _, document_id = add_document | |||||
| res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "a"}) | res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "a"}) | ||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_document_id(self, get_http_api_auth, get_dataset_id_and_document_id, document_id, expected_code, expected_message): | |||||
| dataset_id, _ = get_dataset_id_and_document_id | |||||
| def test_invalid_document_id(self, get_http_api_auth, add_document, document_id, expected_code, expected_message): | |||||
| dataset_id, _ = add_document | |||||
| res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "chunk test"}) | res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "chunk test"}) | ||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| def test_repeated_add_chunk(self, get_http_api_auth, get_dataset_id_and_document_id): | |||||
| def test_repeated_add_chunk(self, get_http_api_auth, add_document): | |||||
| payload = {"content": "chunk test"} | payload = {"content": "chunk test"} | ||||
| dataset_id, document_id = get_dataset_id_and_document_id | |||||
| dataset_id, document_id = add_document | |||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id) | res = list_chunks(get_http_api_auth, dataset_id, document_id) | ||||
| if res["code"] != 0: | if res["code"] != 0: | ||||
| assert False, res | assert False, res | ||||
| assert False, res | assert False, res | ||||
| assert res["data"]["doc"]["chunk_count"] == chunks_count + 2 | assert res["data"]["doc"]["chunk_count"] == chunks_count + 2 | ||||
| def test_add_chunk_to_deleted_document(self, get_http_api_auth, get_dataset_id_and_document_id): | |||||
| dataset_id, document_id = get_dataset_id_and_document_id | |||||
| delete_documnet(get_http_api_auth, dataset_id, {"ids": [document_id]}) | |||||
| def test_add_chunk_to_deleted_document(self, get_http_api_auth, add_document): | |||||
| dataset_id, document_id = add_document | |||||
| delete_documnets(get_http_api_auth, dataset_id, {"ids": [document_id]}) | |||||
| res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "chunk test"}) | res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "chunk test"}) | ||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == f"You don't own the document {document_id}." | assert res["message"] == f"You don't own the document {document_id}." | ||||
| @pytest.mark.skip(reason="issues/6411") | @pytest.mark.skip(reason="issues/6411") | ||||
| def test_concurrent_add_chunk(self, get_http_api_auth, get_dataset_id_and_document_id): | |||||
| def test_concurrent_add_chunk(self, get_http_api_auth, add_document): | |||||
| chunk_num = 50 | chunk_num = 50 | ||||
| dataset_id, document_id = get_dataset_id_and_document_id | |||||
| dataset_id, document_id = add_document | |||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id) | res = list_chunks(get_http_api_auth, dataset_id, document_id) | ||||
| if res["code"] != 0: | if res["code"] != 0: | ||||
| assert False, res | assert False, res | 
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| class TestChunkstDeletion: | |||||
| class TestChunksDeletion: | |||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "dataset_id, expected_code, expected_message", | "dataset_id, expected_code, expected_message", | ||||
| [ | [ | ||||
| "document_id, expected_code, expected_message", | "document_id, expected_code, expected_message", | ||||
| [ | [ | ||||
| ("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"), | ("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"), | ||||
| pytest.param( | |||||
| "invalid_document_id", | |||||
| 100, | |||||
| "LookupError('Document not found which is supposed to be there')", | |||||
| marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6611"), | |||||
| ), | |||||
| pytest.param( | |||||
| "invalid_document_id", | |||||
| 100, | |||||
| "rm_chunk deleted chunks 0, expect 4", | |||||
| marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in [None, "elasticsearch"], reason="issues/6611"), | |||||
| ), | |||||
| ("invalid_document_id", 100, """LookupError("Can't find the document with ID invalid_document_id!")"""), | |||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_document_id(self, get_http_api_auth, add_chunks_func, document_id, expected_code, expected_message): | def test_invalid_document_id(self, get_http_api_auth, add_chunks_func, document_id, expected_code, expected_message): | ||||
| dataset_id, _, chunk_ids = add_chunks_func | dataset_id, _, chunk_ids = add_chunks_func | ||||
| res = delete_chunks(get_http_api_auth, dataset_id, document_id, {"chunk_ids": chunk_ids}) | res = delete_chunks(get_http_api_auth, dataset_id, document_id, {"chunk_ids": chunk_ids}) | ||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| #assert res["message"] == expected_message | |||||
| assert res["message"] == expected_message | |||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "payload", | "payload", | 
| from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
| import pytest | import pytest | ||||
| from common import ( | |||||
| INVALID_API_TOKEN, | |||||
| batch_add_chunks, | |||||
| list_chunks, | |||||
| ) | |||||
| from common import INVALID_API_TOKEN, batch_add_chunks, list_chunks | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| assert all(r["code"] == 0 for r in responses) | assert all(r["code"] == 0 for r in responses) | ||||
| assert all(len(r["data"]["chunks"]) == 5 for r in responses) | assert all(len(r["data"]["chunks"]) == 5 for r in responses) | ||||
| def test_default(self, get_http_api_auth, get_dataset_id_and_document_id): | |||||
| dataset_id, document_id = get_dataset_id_and_document_id | |||||
| def test_default(self, get_http_api_auth, add_document): | |||||
| dataset_id, document_id = add_document | |||||
| res = list_chunks(get_http_api_auth, dataset_id, document_id) | res = list_chunks(get_http_api_auth, dataset_id, document_id) | ||||
| chunks_count = res["data"]["doc"]["chunk_count"] | chunks_count = res["data"]["doc"]["chunk_count"] | ||||
| batch_add_chunks(get_http_api_auth, dataset_id, document_id, 31) | batch_add_chunks(get_http_api_auth, dataset_id, document_id, 31) | 
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # | # | ||||
| import os | import os | ||||
| import pytest | import pytest | ||||
| ({"question": "chunk"}, 102, 0, "`dataset_ids` is required."), | ({"question": "chunk"}, 102, 0, "`dataset_ids` is required."), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_basic_scenarios( | |||||
| self, get_http_api_auth, add_chunks, payload, expected_code, expected_page_size, expected_message | |||||
| ): | |||||
| def test_basic_scenarios(self, get_http_api_auth, add_chunks, payload, expected_code, expected_page_size, expected_message): | |||||
| dataset_id, document_id, _ = add_chunks | dataset_id, document_id, _ = add_chunks | ||||
| if "dataset_ids" in payload: | if "dataset_ids" in payload: | ||||
| payload["dataset_ids"] = [dataset_id] | payload["dataset_ids"] = [dataset_id] | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_page_size( | |||||
| self, get_http_api_auth, add_chunks, payload, expected_code, expected_page_size, expected_message | |||||
| ): | |||||
| def test_page_size(self, get_http_api_auth, add_chunks, payload, expected_code, expected_page_size, expected_message): | |||||
| dataset_id, _, _ = add_chunks | dataset_id, _, _ = add_chunks | ||||
| payload.update({"question": "chunk", "dataset_ids": [dataset_id]}) | payload.update({"question": "chunk", "dataset_ids": [dataset_id]}) | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_vector_similarity_weight( | |||||
| self, get_http_api_auth, add_chunks, payload, expected_code, expected_page_size, expected_message | |||||
| ): | |||||
| def test_vector_similarity_weight(self, get_http_api_auth, add_chunks, payload, expected_code, expected_page_size, expected_message): | |||||
| dataset_id, _, _ = add_chunks | dataset_id, _, _ = add_chunks | ||||
| payload.update({"question": "chunk", "dataset_ids": [dataset_id]}) | payload.update({"question": "chunk", "dataset_ids": [dataset_id]}) | ||||
| res = retrieval_chunks(get_http_api_auth, payload) | res = retrieval_chunks(get_http_api_auth, payload) | ||||
| "payload, expected_code, expected_message", | "payload, expected_code, expected_message", | ||||
| [ | [ | ||||
| ({"rerank_id": "BAAI/bge-reranker-v2-m3"}, 0, ""), | ({"rerank_id": "BAAI/bge-reranker-v2-m3"}, 0, ""), | ||||
| pytest.param( | |||||
| {"rerank_id": "unknown"}, 100, "LookupError('Model(unknown) not authorized')", marks=pytest.mark.skip | |||||
| ), | |||||
| pytest.param({"rerank_id": "unknown"}, 100, "LookupError('Model(unknown) not authorized')", marks=pytest.mark.skip), | |||||
| ], | ], | ||||
| ) | ) | ||||
| def test_rerank_id(self, get_http_api_auth, add_chunks, payload, expected_code, expected_message): | def test_rerank_id(self, get_http_api_auth, add_chunks, payload, expected_code, expected_message): | ||||
| else: | else: | ||||
| assert expected_message in res["message"] | assert expected_message in res["message"] | ||||
| @pytest.mark.skip(reason="chat model is not set") | |||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "payload, expected_code, expected_page_size, expected_message", | "payload, expected_code, expected_page_size, expected_message", | ||||
| [ | [ | ||||
| pytest.param({"highlight": None}, 0, False, "", marks=pytest.mark.skip(reason="issues/6648")), | pytest.param({"highlight": None}, 0, False, "", marks=pytest.mark.skip(reason="issues/6648")), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_highlight( | |||||
| self, get_http_api_auth, add_chunks, payload, expected_code, expected_highlight, expected_message | |||||
| ): | |||||
| def test_highlight(self, get_http_api_auth, add_chunks, payload, expected_code, expected_highlight, expected_message): | |||||
| dataset_id, _, _ = add_chunks | dataset_id, _, _ = add_chunks | ||||
| payload.update({"question": "chunk", "dataset_ids": [dataset_id]}) | payload.update({"question": "chunk", "dataset_ids": [dataset_id]}) | ||||
| res = retrieval_chunks(get_http_api_auth, payload) | res = retrieval_chunks(get_http_api_auth, payload) | ||||
| res = retrieval_chunks(get_http_api_auth, payload) | res = retrieval_chunks(get_http_api_auth, payload) | ||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert len(res["data"]["chunks"]) == 4 | assert len(res["data"]["chunks"]) == 4 | ||||
| def test_concurrent_retrieval(self, get_http_api_auth, add_chunks): | |||||
| from concurrent.futures import ThreadPoolExecutor | |||||
| dataset_id, _, _ = add_chunks | |||||
| payload = {"question": "chunk", "dataset_ids": [dataset_id]} | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | |||||
| futures = [executor.submit(retrieval_chunks, get_http_api_auth, payload) for i in range(100)] | |||||
| responses = [f.result() for f in futures] | |||||
| assert all(r["code"] == 0 for r in responses) | 
| from random import randint | from random import randint | ||||
| import pytest | import pytest | ||||
| from common import INVALID_API_TOKEN, delete_documnet, update_chunk | |||||
| from common import INVALID_API_TOKEN, delete_documnets, update_chunk | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| def test_update_chunk_to_deleted_document(self, get_http_api_auth, add_chunks): | def test_update_chunk_to_deleted_document(self, get_http_api_auth, add_chunks): | ||||
| dataset_id, document_id, chunk_ids = add_chunks | dataset_id, document_id, chunk_ids = add_chunks | ||||
| delete_documnet(get_http_api_auth, dataset_id, {"ids": [document_id]}) | |||||
| delete_documnets(get_http_api_auth, dataset_id, {"ids": [document_id]}) | |||||
| res = update_chunk(get_http_api_auth, dataset_id, document_id, chunk_ids[0]) | res = update_chunk(get_http_api_auth, dataset_id, document_id, chunk_ids[0]) | ||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == f"Can't find this chunk {chunk_ids[0]}" | assert res["message"] == f"Can't find this chunk {chunk_ids[0]}" | 
| import pytest | import pytest | ||||
| from common import batch_create_datasets, delete_dataset | |||||
| from common import batch_create_datasets, delete_datasets | |||||
| @pytest.fixture(scope="class") | @pytest.fixture(scope="class") | ||||
| def get_dataset_ids(get_http_api_auth, request): | |||||
| def add_datasets(get_http_api_auth, request): | |||||
| def cleanup(): | def cleanup(): | ||||
| delete_dataset(get_http_api_auth) | |||||
| delete_datasets(get_http_api_auth) | |||||
| request.addfinalizer(cleanup) | request.addfinalizer(cleanup) | ||||
| return batch_create_datasets(get_http_api_auth, 5) | return batch_create_datasets(get_http_api_auth, 5) | ||||
| @pytest.fixture(scope="function") | |||||
| def add_datasets_func(get_http_api_auth, request): | |||||
| def cleanup(): | |||||
| delete_datasets(get_http_api_auth) | |||||
| request.addfinalizer(cleanup) | |||||
| return batch_create_datasets(get_http_api_auth, 3) | 
| res = create_dataset(get_http_api_auth, payload) | res = create_dataset(get_http_api_auth, payload) | ||||
| assert res["code"] == 0, f"Failed to create dataset {i}" | assert res["code"] == 0, f"Failed to create dataset {i}" | ||||
| @pytest.mark.usefixtures("clear_datasets") | |||||
| class TestAdvancedConfigurations: | |||||
| def test_avatar(self, get_http_api_auth, tmp_path): | def test_avatar(self, get_http_api_auth, tmp_path): | ||||
| fn = create_image_file(tmp_path / "ragflow_test.png") | fn = create_image_file(tmp_path / "ragflow_test.png") | ||||
| payload = { | payload = { | 
| from common import ( | from common import ( | ||||
| INVALID_API_TOKEN, | INVALID_API_TOKEN, | ||||
| batch_create_datasets, | batch_create_datasets, | ||||
| delete_dataset, | |||||
| list_dataset, | |||||
| delete_datasets, | |||||
| list_datasets, | |||||
| ) | ) | ||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| @pytest.mark.usefixtures("clear_datasets") | |||||
| class TestAuthorization: | class TestAuthorization: | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "auth, expected_code, expected_message", | "auth, expected_code, expected_message", | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, get_http_api_auth, auth, expected_code, expected_message): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| res = delete_dataset(auth, {"ids": ids}) | |||||
| def test_invalid_auth(self, auth, expected_code, expected_message): | |||||
| res = delete_datasets(auth) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| res = list_dataset(get_http_api_auth) | |||||
| assert len(res["data"]) == 1 | |||||
| @pytest.mark.usefixtures("clear_datasets") | |||||
| class TestDatasetDeletion: | |||||
| class TestDatasetsDeletion: | |||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "payload, expected_code, expected_message, remaining", | "payload, expected_code, expected_message, remaining", | ||||
| [ | [ | ||||
| (lambda r: {"ids": r}, 0, "", 0), | (lambda r: {"ids": r}, 0, "", 0), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_basic_scenarios(self, get_http_api_auth, payload, expected_code, expected_message, remaining): | |||||
| ids = batch_create_datasets(get_http_api_auth, 3) | |||||
| def test_basic_scenarios(self, get_http_api_auth, add_datasets_func, payload, expected_code, expected_message, remaining): | |||||
| dataset_ids = add_datasets_func | |||||
| if callable(payload): | if callable(payload): | ||||
| payload = payload(ids) | |||||
| res = delete_dataset(get_http_api_auth, payload) | |||||
| payload = payload(dataset_ids) | |||||
| res = delete_datasets(get_http_api_auth, payload) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if res["code"] != 0: | if res["code"] != 0: | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| res = list_dataset(get_http_api_auth) | |||||
| res = list_datasets(get_http_api_auth) | |||||
| assert len(res["data"]) == remaining | assert len(res["data"]) == remaining | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| lambda r: {"ids": r + ["invalid_id"]}, | lambda r: {"ids": r + ["invalid_id"]}, | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_delete_partial_invalid_id(self, get_http_api_auth, payload): | |||||
| ids = batch_create_datasets(get_http_api_auth, 3) | |||||
| def test_delete_partial_invalid_id(self, get_http_api_auth, add_datasets_func, payload): | |||||
| dataset_ids = add_datasets_func | |||||
| if callable(payload): | if callable(payload): | ||||
| payload = payload(ids) | |||||
| res = delete_dataset(get_http_api_auth, payload) | |||||
| payload = payload(dataset_ids) | |||||
| res = delete_datasets(get_http_api_auth, payload) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert res["data"]["errors"][0] == "You don't own the dataset invalid_id" | assert res["data"]["errors"][0] == "You don't own the dataset invalid_id" | ||||
| assert res["data"]["success_count"] == 3 | assert res["data"]["success_count"] == 3 | ||||
| res = list_dataset(get_http_api_auth) | |||||
| res = list_datasets(get_http_api_auth) | |||||
| assert len(res["data"]) == 0 | assert len(res["data"]) == 0 | ||||
| def test_repeated_deletion(self, get_http_api_auth): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| res = delete_dataset(get_http_api_auth, {"ids": ids}) | |||||
| def test_repeated_deletion(self, get_http_api_auth, add_datasets_func): | |||||
| dataset_ids = add_datasets_func | |||||
| res = delete_datasets(get_http_api_auth, {"ids": dataset_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| res = delete_dataset(get_http_api_auth, {"ids": ids}) | |||||
| res = delete_datasets(get_http_api_auth, {"ids": dataset_ids}) | |||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == f"You don't own the dataset {ids[0]}" | |||||
| assert "You don't own the dataset" in res["message"] | |||||
| def test_duplicate_deletion(self, get_http_api_auth): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| res = delete_dataset(get_http_api_auth, {"ids": ids + ids}) | |||||
| def test_duplicate_deletion(self, get_http_api_auth, add_datasets_func): | |||||
| dataset_ids = add_datasets_func | |||||
| res = delete_datasets(get_http_api_auth, {"ids": dataset_ids + dataset_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert res["data"]["errors"][0] == f"Duplicate dataset ids: {ids[0]}" | |||||
| assert res["data"]["success_count"] == 1 | |||||
| assert "Duplicate dataset ids" in res["data"]["errors"][0] | |||||
| assert res["data"]["success_count"] == 3 | |||||
| res = list_dataset(get_http_api_auth) | |||||
| res = list_datasets(get_http_api_auth) | |||||
| assert len(res["data"]) == 0 | assert len(res["data"]) == 0 | ||||
| def test_concurrent_deletion(self, get_http_api_auth): | def test_concurrent_deletion(self, get_http_api_auth): | ||||
| ids = batch_create_datasets(get_http_api_auth, 100) | ids = batch_create_datasets(get_http_api_auth, 100) | ||||
| with ThreadPoolExecutor(max_workers=5) as executor: | with ThreadPoolExecutor(max_workers=5) as executor: | ||||
| futures = [executor.submit(delete_dataset, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(100)] | |||||
| futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(100)] | |||||
| responses = [f.result() for f in futures] | responses = [f.result() for f in futures] | ||||
| assert all(r["code"] == 0 for r in responses) | assert all(r["code"] == 0 for r in responses) | ||||
| @pytest.mark.slow | @pytest.mark.slow | ||||
| def test_delete_10k(self, get_http_api_auth): | def test_delete_10k(self, get_http_api_auth): | ||||
| ids = batch_create_datasets(get_http_api_auth, 10_000) | ids = batch_create_datasets(get_http_api_auth, 10_000) | ||||
| res = delete_dataset(get_http_api_auth, {"ids": ids}) | |||||
| res = delete_datasets(get_http_api_auth, {"ids": ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| res = list_dataset(get_http_api_auth) | |||||
| res = list_datasets(get_http_api_auth) | |||||
| assert len(res["data"]) == 0 | assert len(res["data"]) == 0 | 
| from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
| import pytest | import pytest | ||||
| from common import INVALID_API_TOKEN, list_dataset | |||||
| from common import INVALID_API_TOKEN, list_datasets | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| return all(a >= b for a, b in zip(timestamps, timestamps[1:])) if descending else all(a <= b for a, b in zip(timestamps, timestamps[1:])) | return all(a >= b for a, b in zip(timestamps, timestamps[1:])) if descending else all(a <= b for a, b in zip(timestamps, timestamps[1:])) | ||||
| @pytest.mark.usefixtures("clear_datasets") | |||||
| class TestAuthorization: | class TestAuthorization: | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "auth, expected_code, expected_message", | "auth, expected_code, expected_message", | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, auth, expected_code, expected_message): | def test_invalid_auth(self, auth, expected_code, expected_message): | ||||
| res = list_dataset(auth) | |||||
| res = list_datasets(auth) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| @pytest.mark.usefixtures("get_dataset_ids") | |||||
| class TestDatasetList: | |||||
| @pytest.mark.usefixtures("add_datasets") | |||||
| class TestDatasetsList: | |||||
| def test_default(self, get_http_api_auth): | def test_default(self, get_http_api_auth): | ||||
| res = list_dataset(get_http_api_auth, params={}) | |||||
| res = list_datasets(get_http_api_auth, params={}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert len(res["data"]) == 5 | assert len(res["data"]) == 5 | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_page(self, get_http_api_auth, params, expected_code, expected_page_size, expected_message): | def test_page(self, get_http_api_auth, params, expected_code, expected_page_size, expected_message): | ||||
| res = list_dataset(get_http_api_auth, params=params) | |||||
| res = list_datasets(get_http_api_auth, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| assert len(res["data"]) == expected_page_size | assert len(res["data"]) == expected_page_size | ||||
| expected_page_size, | expected_page_size, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| res = list_dataset(get_http_api_auth, params=params) | |||||
| res = list_datasets(get_http_api_auth, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| assert len(res["data"]) == expected_page_size | assert len(res["data"]) == expected_page_size | ||||
| assertions, | assertions, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| res = list_dataset(get_http_api_auth, params=params) | |||||
| res = list_datasets(get_http_api_auth, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| if callable(assertions): | if callable(assertions): | ||||
| assertions, | assertions, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| res = list_dataset(get_http_api_auth, params=params) | |||||
| res = list_datasets(get_http_api_auth, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| if callable(assertions): | if callable(assertions): | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_name(self, get_http_api_auth, params, expected_code, expected_num, expected_message): | def test_name(self, get_http_api_auth, params, expected_code, expected_num, expected_message): | ||||
| res = list_dataset(get_http_api_auth, params=params) | |||||
| res = list_datasets(get_http_api_auth, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| if params["name"] in [None, ""]: | if params["name"] in [None, ""]: | ||||
| def test_id( | def test_id( | ||||
| self, | self, | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| get_dataset_ids, | |||||
| add_datasets, | |||||
| dataset_id, | dataset_id, | ||||
| expected_code, | expected_code, | ||||
| expected_num, | expected_num, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_ids = get_dataset_ids | |||||
| dataset_ids = add_datasets | |||||
| if callable(dataset_id): | if callable(dataset_id): | ||||
| params = {"id": dataset_id(dataset_ids)} | params = {"id": dataset_id(dataset_ids)} | ||||
| else: | else: | ||||
| params = {"id": dataset_id} | params = {"id": dataset_id} | ||||
| res = list_dataset(get_http_api_auth, params=params) | |||||
| res = list_datasets(get_http_api_auth, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| if params["id"] in [None, ""]: | if params["id"] in [None, ""]: | ||||
| def test_name_and_id( | def test_name_and_id( | ||||
| self, | self, | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| get_dataset_ids, | |||||
| add_datasets, | |||||
| dataset_id, | dataset_id, | ||||
| name, | name, | ||||
| expected_code, | expected_code, | ||||
| expected_num, | expected_num, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_ids = get_dataset_ids | |||||
| dataset_ids = add_datasets | |||||
| if callable(dataset_id): | if callable(dataset_id): | ||||
| params = {"id": dataset_id(dataset_ids), "name": name} | params = {"id": dataset_id(dataset_ids), "name": name} | ||||
| else: | else: | ||||
| params = {"id": dataset_id, "name": name} | params = {"id": dataset_id, "name": name} | ||||
| res = list_dataset(get_http_api_auth, params=params) | |||||
| res = list_datasets(get_http_api_auth, params=params) | |||||
| if expected_code == 0: | if expected_code == 0: | ||||
| assert len(res["data"]) == expected_num | assert len(res["data"]) == expected_num | ||||
| else: | else: | ||||
| def test_concurrent_list(self, get_http_api_auth): | def test_concurrent_list(self, get_http_api_auth): | ||||
| with ThreadPoolExecutor(max_workers=5) as executor: | with ThreadPoolExecutor(max_workers=5) as executor: | ||||
| futures = [executor.submit(list_dataset, get_http_api_auth) for i in range(100)] | |||||
| futures = [executor.submit(list_datasets, get_http_api_auth) for i in range(100)] | |||||
| responses = [f.result() for f in futures] | responses = [f.result() for f in futures] | ||||
| assert all(r["code"] == 0 for r in responses) | assert all(r["code"] == 0 for r in responses) | ||||
| def test_invalid_params(self, get_http_api_auth): | def test_invalid_params(self, get_http_api_auth): | ||||
| params = {"a": "b"} | params = {"a": "b"} | ||||
| res = list_dataset(get_http_api_auth, params=params) | |||||
| res = list_datasets(get_http_api_auth, params=params) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert len(res["data"]) == 5 | assert len(res["data"]) == 5 | 
| from common import ( | from common import ( | ||||
| DATASET_NAME_LIMIT, | DATASET_NAME_LIMIT, | ||||
| INVALID_API_TOKEN, | INVALID_API_TOKEN, | ||||
| batch_create_datasets, | |||||
| list_dataset, | |||||
| list_datasets, | |||||
| update_dataset, | update_dataset, | ||||
| ) | ) | ||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| # TODO: Missing scenario for updating embedding_model with chunk_count != 0 | # TODO: Missing scenario for updating embedding_model with chunk_count != 0 | ||||
| @pytest.mark.usefixtures("clear_datasets") | |||||
| class TestAuthorization: | class TestAuthorization: | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "auth, expected_code, expected_message", | "auth, expected_code, expected_message", | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, get_http_api_auth, auth, expected_code, expected_message): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| res = update_dataset(auth, ids[0], {"name": "new_name"}) | |||||
| def test_invalid_auth(self, auth, expected_code, expected_message): | |||||
| res = update_dataset(auth, "dataset_id") | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| @pytest.mark.usefixtures("clear_datasets") | |||||
| class TestDatasetUpdate: | class TestDatasetUpdate: | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "name, expected_code, expected_message", | "name, expected_code, expected_message", | ||||
| ("DATASET_1", 102, "Duplicated dataset name in updating dataset."), | ("DATASET_1", 102, "Duplicated dataset name in updating dataset."), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_name(self, get_http_api_auth, name, expected_code, expected_message): | |||||
| ids = batch_create_datasets(get_http_api_auth, 2) | |||||
| res = update_dataset(get_http_api_auth, ids[0], {"name": name}) | |||||
| def test_name(self, get_http_api_auth, add_datasets_func, name, expected_code, expected_message): | |||||
| dataset_ids = add_datasets_func | |||||
| res = update_dataset(get_http_api_auth, dataset_ids[0], {"name": name}) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| res = list_dataset(get_http_api_auth, {"id": ids[0]}) | |||||
| res = list_datasets(get_http_api_auth, {"id": dataset_ids[0]}) | |||||
| assert res["data"][0]["name"] == name | assert res["data"][0]["name"] == name | ||||
| else: | else: | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| (None, 102, "`embedding_model` can't be empty"), | (None, 102, "`embedding_model` can't be empty"), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_embedding_model(self, get_http_api_auth, embedding_model, expected_code, expected_message): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| res = update_dataset(get_http_api_auth, ids[0], {"embedding_model": embedding_model}) | |||||
| def test_embedding_model(self, get_http_api_auth, add_dataset_func, embedding_model, expected_code, expected_message): | |||||
| dataset_id = add_dataset_func | |||||
| res = update_dataset(get_http_api_auth, dataset_id, {"embedding_model": embedding_model}) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| res = list_dataset(get_http_api_auth, {"id": ids[0]}) | |||||
| res = list_datasets(get_http_api_auth, {"id": dataset_id}) | |||||
| assert res["data"][0]["embedding_model"] == embedding_model | assert res["data"][0]["embedding_model"] == embedding_model | ||||
| else: | else: | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_chunk_method(self, get_http_api_auth, chunk_method, expected_code, expected_message): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| res = update_dataset(get_http_api_auth, ids[0], {"chunk_method": chunk_method}) | |||||
| def test_chunk_method(self, get_http_api_auth, add_dataset_func, chunk_method, expected_code, expected_message): | |||||
| dataset_id = add_dataset_func | |||||
| res = update_dataset(get_http_api_auth, dataset_id, {"chunk_method": chunk_method}) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| res = list_dataset(get_http_api_auth, {"id": ids[0]}) | |||||
| res = list_datasets(get_http_api_auth, {"id": dataset_id}) | |||||
| if chunk_method != "": | if chunk_method != "": | ||||
| assert res["data"][0]["chunk_method"] == chunk_method | assert res["data"][0]["chunk_method"] == chunk_method | ||||
| else: | else: | ||||
| else: | else: | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| def test_avatar(self, get_http_api_auth, tmp_path): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_avatar(self, get_http_api_auth, add_dataset_func, tmp_path): | |||||
| dataset_id = add_dataset_func | |||||
| fn = create_image_file(tmp_path / "ragflow_test.png") | fn = create_image_file(tmp_path / "ragflow_test.png") | ||||
| payload = {"avatar": encode_avatar(fn)} | payload = {"avatar": encode_avatar(fn)} | ||||
| res = update_dataset(get_http_api_auth, ids[0], payload) | |||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| def test_description(self, get_http_api_auth): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_description(self, get_http_api_auth, add_dataset_func): | |||||
| dataset_id = add_dataset_func | |||||
| payload = {"description": "description"} | payload = {"description": "description"} | ||||
| res = update_dataset(get_http_api_auth, ids[0], payload) | |||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| res = list_dataset(get_http_api_auth, {"id": ids[0]}) | |||||
| res = list_datasets(get_http_api_auth, {"id": dataset_id}) | |||||
| assert res["data"][0]["description"] == "description" | assert res["data"][0]["description"] == "description" | ||||
| def test_pagerank(self, get_http_api_auth): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_pagerank(self, get_http_api_auth, add_dataset_func): | |||||
| dataset_id = add_dataset_func | |||||
| payload = {"pagerank": 1} | payload = {"pagerank": 1} | ||||
| res = update_dataset(get_http_api_auth, ids[0], payload) | |||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| res = list_dataset(get_http_api_auth, {"id": ids[0]}) | |||||
| res = list_datasets(get_http_api_auth, {"id": dataset_id}) | |||||
| assert res["data"][0]["pagerank"] == 1 | assert res["data"][0]["pagerank"] == 1 | ||||
| def test_similarity_threshold(self, get_http_api_auth): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_similarity_threshold(self, get_http_api_auth, add_dataset_func): | |||||
| dataset_id = add_dataset_func | |||||
| payload = {"similarity_threshold": 1} | payload = {"similarity_threshold": 1} | ||||
| res = update_dataset(get_http_api_auth, ids[0], payload) | |||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| res = list_dataset(get_http_api_auth, {"id": ids[0]}) | |||||
| res = list_datasets(get_http_api_auth, {"id": dataset_id}) | |||||
| assert res["data"][0]["similarity_threshold"] == 1 | assert res["data"][0]["similarity_threshold"] == 1 | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| ("other_permission", 102), | ("other_permission", 102), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_permission(self, get_http_api_auth, permission, expected_code): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_permission(self, get_http_api_auth, add_dataset_func, permission, expected_code): | |||||
| dataset_id = add_dataset_func | |||||
| payload = {"permission": permission} | payload = {"permission": permission} | ||||
| res = update_dataset(get_http_api_auth, ids[0], payload) | |||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| res = list_dataset(get_http_api_auth, {"id": ids[0]}) | |||||
| res = list_datasets(get_http_api_auth, {"id": dataset_id}) | |||||
| if expected_code == 0 and permission != "": | if expected_code == 0 and permission != "": | ||||
| assert res["data"][0]["permission"] == permission | assert res["data"][0]["permission"] == permission | ||||
| if permission == "": | if permission == "": | ||||
| assert res["data"][0]["permission"] == "me" | assert res["data"][0]["permission"] == "me" | ||||
| def test_vector_similarity_weight(self, get_http_api_auth): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_vector_similarity_weight(self, get_http_api_auth, add_dataset_func): | |||||
| dataset_id = add_dataset_func | |||||
| payload = {"vector_similarity_weight": 1} | payload = {"vector_similarity_weight": 1} | ||||
| res = update_dataset(get_http_api_auth, ids[0], payload) | |||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| res = list_dataset(get_http_api_auth, {"id": ids[0]}) | |||||
| res = list_datasets(get_http_api_auth, {"id": dataset_id}) | |||||
| assert res["data"][0]["vector_similarity_weight"] == 1 | assert res["data"][0]["vector_similarity_weight"] == 1 | ||||
| def test_invalid_dataset_id(self, get_http_api_auth): | def test_invalid_dataset_id(self, get_http_api_auth): | ||||
| batch_create_datasets(get_http_api_auth, 1) | |||||
| res = update_dataset(get_http_api_auth, "invalid_dataset_id", {"name": "invalid_dataset_id"}) | res = update_dataset(get_http_api_auth, "invalid_dataset_id", {"name": "invalid_dataset_id"}) | ||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == "You don't own the dataset" | assert res["message"] == "You don't own the dataset" | ||||
| {"update_time": 1741671443339}, | {"update_time": 1741671443339}, | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_modify_read_only_field(self, get_http_api_auth, payload): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| res = update_dataset(get_http_api_auth, ids[0], payload) | |||||
| def test_modify_read_only_field(self, get_http_api_auth, add_dataset_func, payload): | |||||
| dataset_id = add_dataset_func | |||||
| res = update_dataset(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == 101 | assert res["code"] == 101 | ||||
| assert "is readonly" in res["message"] | assert "is readonly" in res["message"] | ||||
| def test_modify_unknown_field(self, get_http_api_auth): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| res = update_dataset(get_http_api_auth, ids[0], {"unknown_field": 0}) | |||||
| def test_modify_unknown_field(self, get_http_api_auth, add_dataset_func): | |||||
| dataset_id = add_dataset_func | |||||
| res = update_dataset(get_http_api_auth, dataset_id, {"unknown_field": 0}) | |||||
| assert res["code"] == 100 | assert res["code"] == 100 | ||||
| def test_concurrent_update(self, get_http_api_auth): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_concurrent_update(self, get_http_api_auth, add_dataset_func): | |||||
| dataset_id = add_dataset_func | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | with ThreadPoolExecutor(max_workers=5) as executor: | ||||
| futures = [executor.submit(update_dataset, get_http_api_auth, ids[0], {"name": f"dataset_{i}"}) for i in range(100)] | |||||
| futures = [executor.submit(update_dataset, get_http_api_auth, dataset_id, {"name": f"dataset_{i}"}) for i in range(100)] | |||||
| responses = [f.result() for f in futures] | responses = [f.result() for f in futures] | ||||
| assert all(r["code"] == 0 for r in responses) | assert all(r["code"] == 0 for r in responses) | 
| import pytest | import pytest | ||||
| from common import batch_create_datasets, bulk_upload_documents, delete_dataset | |||||
| from common import bulk_upload_documents, delete_documnets | |||||
| @pytest.fixture(scope="class") | |||||
| def file_management_tmp_dir(tmp_path_factory): | |||||
| return tmp_path_factory.mktemp("file_management") | |||||
| @pytest.fixture(scope="function") | |||||
| def add_document_func(request, get_http_api_auth, add_dataset, ragflow_tmp_dir): | |||||
| dataset_id = add_dataset | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 1, ragflow_tmp_dir) | |||||
| def cleanup(): | |||||
| delete_documnets(get_http_api_auth, dataset_id, {"ids": document_ids}) | |||||
| request.addfinalizer(cleanup) | |||||
| return dataset_id, document_ids[0] | |||||
| @pytest.fixture(scope="class") | @pytest.fixture(scope="class") | ||||
| def get_dataset_id_and_document_ids(get_http_api_auth, file_management_tmp_dir, request): | |||||
| def add_documents(request, get_http_api_auth, add_dataset, ragflow_tmp_dir): | |||||
| dataset_id = add_dataset | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 5, ragflow_tmp_dir) | |||||
| def cleanup(): | def cleanup(): | ||||
| delete_dataset(get_http_api_auth) | |||||
| delete_documnets(get_http_api_auth, dataset_id, {"ids": document_ids}) | |||||
| request.addfinalizer(cleanup) | request.addfinalizer(cleanup) | ||||
| return dataset_id, document_ids | |||||
| @pytest.fixture(scope="function") | |||||
| def add_documents_func(get_http_api_auth, add_dataset_func, ragflow_tmp_dir): | |||||
| dataset_id = add_dataset_func | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 3, ragflow_tmp_dir) | |||||
| dataset_ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| dataset_id = dataset_ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 5, file_management_tmp_dir) | |||||
| return dataset_id, document_ids | return dataset_id, document_ids | 
| from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
| import pytest | import pytest | ||||
| from common import ( | |||||
| INVALID_API_TOKEN, | |||||
| batch_create_datasets, | |||||
| bulk_upload_documents, | |||||
| delete_documnet, | |||||
| list_documnet, | |||||
| ) | |||||
| from common import INVALID_API_TOKEN, bulk_upload_documents, delete_documnets, list_documnets | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, get_dataset_id_and_document_ids, auth, expected_code, expected_message): | |||||
| dataset_id, document_ids = get_dataset_id_and_document_ids | |||||
| res = delete_documnet(auth, dataset_id, {"ids": document_ids}) | |||||
| def test_invalid_auth(self, auth, expected_code, expected_message): | |||||
| res = delete_documnets(auth, "dataset_id") | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| @pytest.mark.usefixtures("clear_datasets") | |||||
| class TestDocumentDeletion: | |||||
| class TestDocumentsDeletion: | |||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "payload, expected_code, expected_message, remaining", | "payload, expected_code, expected_message, remaining", | ||||
| [ | [ | ||||
| def test_basic_scenarios( | def test_basic_scenarios( | ||||
| self, | self, | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| tmp_path, | |||||
| add_documents_func, | |||||
| payload, | payload, | ||||
| expected_code, | expected_code, | ||||
| expected_message, | expected_message, | ||||
| remaining, | remaining, | ||||
| ): | ): | ||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, ids[0], 3, tmp_path) | |||||
| dataset_id, document_ids = add_documents_func | |||||
| if callable(payload): | if callable(payload): | ||||
| payload = payload(document_ids) | payload = payload(document_ids) | ||||
| res = delete_documnet(get_http_api_auth, ids[0], payload) | |||||
| res = delete_documnets(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if res["code"] != 0: | if res["code"] != 0: | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| res = list_documnet(get_http_api_auth, ids[0]) | |||||
| res = list_documnets(get_http_api_auth, dataset_id) | |||||
| assert len(res["data"]["docs"]) == remaining | assert len(res["data"]["docs"]) == remaining | ||||
| assert res["data"]["total"] == remaining | assert res["data"]["total"] == remaining | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_dataset_id(self, get_http_api_auth, tmp_path, dataset_id, expected_code, expected_message): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, ids[0], 3, tmp_path) | |||||
| res = delete_documnet(get_http_api_auth, dataset_id, {"ids": document_ids[:1]}) | |||||
| def test_invalid_dataset_id(self, get_http_api_auth, add_documents_func, dataset_id, expected_code, expected_message): | |||||
| _, document_ids = add_documents_func | |||||
| res = delete_documnets(get_http_api_auth, dataset_id, {"ids": document_ids[:1]}) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| lambda r: {"ids": r + ["invalid_id"]}, | lambda r: {"ids": r + ["invalid_id"]}, | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_delete_partial_invalid_id(self, get_http_api_auth, tmp_path, payload): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, ids[0], 3, tmp_path) | |||||
| def test_delete_partial_invalid_id(self, get_http_api_auth, add_documents_func, payload): | |||||
| dataset_id, document_ids = add_documents_func | |||||
| if callable(payload): | if callable(payload): | ||||
| payload = payload(document_ids) | payload = payload(document_ids) | ||||
| res = delete_documnet(get_http_api_auth, ids[0], payload) | |||||
| res = delete_documnets(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == "Documents not found: ['invalid_id']" | assert res["message"] == "Documents not found: ['invalid_id']" | ||||
| res = list_documnet(get_http_api_auth, ids[0]) | |||||
| res = list_documnets(get_http_api_auth, dataset_id) | |||||
| assert len(res["data"]["docs"]) == 0 | assert len(res["data"]["docs"]) == 0 | ||||
| assert res["data"]["total"] == 0 | assert res["data"]["total"] == 0 | ||||
| def test_repeated_deletion(self, get_http_api_auth, tmp_path): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) | |||||
| res = delete_documnet(get_http_api_auth, ids[0], {"ids": document_ids}) | |||||
| def test_repeated_deletion(self, get_http_api_auth, add_documents_func): | |||||
| dataset_id, document_ids = add_documents_func | |||||
| res = delete_documnets(get_http_api_auth, dataset_id, {"ids": document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| res = delete_documnet(get_http_api_auth, ids[0], {"ids": document_ids}) | |||||
| res = delete_documnets(get_http_api_auth, dataset_id, {"ids": document_ids}) | |||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == f"Documents not found: {document_ids}" | |||||
| assert "Documents not found" in res["message"] | |||||
| def test_duplicate_deletion(self, get_http_api_auth, tmp_path): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) | |||||
| res = delete_documnet(get_http_api_auth, ids[0], {"ids": document_ids + document_ids}) | |||||
| def test_duplicate_deletion(self, get_http_api_auth, add_documents_func): | |||||
| dataset_id, document_ids = add_documents_func | |||||
| res = delete_documnets(get_http_api_auth, dataset_id, {"ids": document_ids + document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert res["data"]["errors"][0] == f"Duplicate document ids: {document_ids[0]}" | |||||
| assert res["data"]["success_count"] == 1 | |||||
| assert "Duplicate document ids" in res["data"]["errors"][0] | |||||
| assert res["data"]["success_count"] == 3 | |||||
| res = list_documnet(get_http_api_auth, ids[0]) | |||||
| res = list_documnets(get_http_api_auth, dataset_id) | |||||
| assert len(res["data"]["docs"]) == 0 | assert len(res["data"]["docs"]) == 0 | ||||
| assert res["data"]["total"] == 0 | assert res["data"]["total"] == 0 | ||||
| def test_concurrent_deletion(self, get_http_api_auth, tmp_path): | |||||
| documnets_num = 100 | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, ids[0], documnets_num, tmp_path) | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | |||||
| futures = [ | |||||
| executor.submit( | |||||
| delete_documnet, | |||||
| get_http_api_auth, | |||||
| ids[0], | |||||
| {"ids": document_ids[i : i + 1]}, | |||||
| ) | |||||
| for i in range(documnets_num) | |||||
| ] | |||||
| responses = [f.result() for f in futures] | |||||
| assert all(r["code"] == 0 for r in responses) | |||||
| @pytest.mark.slow | |||||
| def test_delete_1k(self, get_http_api_auth, tmp_path): | |||||
| documnets_num = 1_000 | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, ids[0], documnets_num, tmp_path) | |||||
| res = list_documnet(get_http_api_auth, ids[0]) | |||||
| assert res["data"]["total"] == documnets_num | |||||
| res = delete_documnet(get_http_api_auth, ids[0], {"ids": document_ids}) | |||||
| assert res["code"] == 0 | |||||
| res = list_documnet(get_http_api_auth, ids[0]) | |||||
| assert res["data"]["total"] == 0 | |||||
| def test_concurrent_deletion(get_http_api_auth, add_dataset, tmp_path): | |||||
| documnets_num = 100 | |||||
| dataset_id = add_dataset | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, documnets_num, tmp_path) | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | |||||
| futures = [ | |||||
| executor.submit( | |||||
| delete_documnets, | |||||
| get_http_api_auth, | |||||
| dataset_id, | |||||
| {"ids": document_ids[i : i + 1]}, | |||||
| ) | |||||
| for i in range(documnets_num) | |||||
| ] | |||||
| responses = [f.result() for f in futures] | |||||
| assert all(r["code"] == 0 for r in responses) | |||||
| @pytest.mark.slow | |||||
| def test_delete_1k(get_http_api_auth, add_dataset, tmp_path): | |||||
| documnets_num = 1_000 | |||||
| dataset_id = add_dataset | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, documnets_num, tmp_path) | |||||
| res = list_documnets(get_http_api_auth, dataset_id) | |||||
| assert res["data"]["total"] == documnets_num | |||||
| res = delete_documnets(get_http_api_auth, dataset_id, {"ids": document_ids}) | |||||
| assert res["code"] == 0 | |||||
| res = list_documnets(get_http_api_auth, dataset_id) | |||||
| assert res["data"]["total"] == 0 | 
| from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
| import pytest | import pytest | ||||
| from common import INVALID_API_TOKEN, batch_create_datasets, bulk_upload_documents, download_document, upload_documnets | |||||
| from common import INVALID_API_TOKEN, bulk_upload_documents, download_document, upload_documnets | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| from libs.utils import compare_by_hash | from libs.utils import compare_by_hash | ||||
| from requests import codes | from requests import codes | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, get_dataset_id_and_document_ids, tmp_path, auth, expected_code, expected_message): | |||||
| dataset_id, document_ids = get_dataset_id_and_document_ids | |||||
| res = download_document(auth, dataset_id, document_ids[0], tmp_path / "ragflow_tes.txt") | |||||
| def test_invalid_auth(self, tmp_path, auth, expected_code, expected_message): | |||||
| res = download_document(auth, "dataset_id", "document_id", tmp_path / "ragflow_tes.txt") | |||||
| assert res.status_code == codes.ok | assert res.status_code == codes.ok | ||||
| with (tmp_path / "ragflow_tes.txt").open("r") as f: | with (tmp_path / "ragflow_tes.txt").open("r") as f: | ||||
| response_json = json.load(f) | response_json = json.load(f) | ||||
| assert response_json["message"] == expected_message | assert response_json["message"] == expected_message | ||||
| @pytest.mark.usefixtures("clear_datasets") | |||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "generate_test_files", | "generate_test_files", | ||||
| [ | [ | ||||
| ], | ], | ||||
| indirect=True, | indirect=True, | ||||
| ) | ) | ||||
| def test_file_type_validation(get_http_api_auth, generate_test_files, request): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_file_type_validation(get_http_api_auth, add_dataset, generate_test_files, request): | |||||
| dataset_id = add_dataset | |||||
| fp = generate_test_files[request.node.callspec.params["generate_test_files"]] | fp = generate_test_files[request.node.callspec.params["generate_test_files"]] | ||||
| res = upload_documnets(get_http_api_auth, ids[0], [fp]) | |||||
| res = upload_documnets(get_http_api_auth, dataset_id, [fp]) | |||||
| document_id = res["data"][0]["id"] | document_id = res["data"][0]["id"] | ||||
| res = download_document( | res = download_document( | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| ids[0], | |||||
| dataset_id, | |||||
| document_id, | document_id, | ||||
| fp.with_stem("ragflow_test_download"), | fp.with_stem("ragflow_test_download"), | ||||
| ) | ) | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_document_id(self, get_http_api_auth, get_dataset_id_and_document_ids, tmp_path, document_id, expected_code, expected_message): | |||||
| dataset_id, _ = get_dataset_id_and_document_ids | |||||
| def test_invalid_document_id(self, get_http_api_auth, add_documents, tmp_path, document_id, expected_code, expected_message): | |||||
| dataset_id, _ = add_documents | |||||
| res = download_document( | res = download_document( | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| dataset_id, | dataset_id, | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_dataset_id(self, get_http_api_auth, get_dataset_id_and_document_ids, tmp_path, dataset_id, expected_code, expected_message): | |||||
| _, document_ids = get_dataset_id_and_document_ids | |||||
| def test_invalid_dataset_id(self, get_http_api_auth, add_documents, tmp_path, dataset_id, expected_code, expected_message): | |||||
| _, document_ids = add_documents | |||||
| res = download_document( | res = download_document( | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| dataset_id, | dataset_id, | ||||
| assert response_json["code"] == expected_code | assert response_json["code"] == expected_code | ||||
| assert response_json["message"] == expected_message | assert response_json["message"] == expected_message | ||||
| def test_same_file_repeat(self, get_http_api_auth, get_dataset_id_and_document_ids, tmp_path, file_management_tmp_dir): | |||||
| def test_same_file_repeat(self, get_http_api_auth, add_documents, tmp_path, ragflow_tmp_dir): | |||||
| num = 5 | num = 5 | ||||
| dataset_id, document_ids = get_dataset_id_and_document_ids | |||||
| dataset_id, document_ids = add_documents | |||||
| for i in range(num): | for i in range(num): | ||||
| res = download_document( | res = download_document( | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| ) | ) | ||||
| assert res.status_code == codes.ok | assert res.status_code == codes.ok | ||||
| assert compare_by_hash( | assert compare_by_hash( | ||||
| file_management_tmp_dir / "ragflow_test_upload_0.txt", | |||||
| ragflow_tmp_dir / "ragflow_test_upload_0.txt", | |||||
| tmp_path / f"ragflow_test_download_{i}.txt", | tmp_path / f"ragflow_test_download_{i}.txt", | ||||
| ) | ) | ||||
| @pytest.mark.usefixtures("clear_datasets") | |||||
| def test_concurrent_download(get_http_api_auth, tmp_path): | |||||
| def test_concurrent_download(get_http_api_auth, add_dataset, tmp_path): | |||||
| document_count = 20 | document_count = 20 | ||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, ids[0], document_count, tmp_path) | |||||
| dataset_id = add_dataset | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, document_count, tmp_path) | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | with ThreadPoolExecutor(max_workers=5) as executor: | ||||
| futures = [ | futures = [ | ||||
| executor.submit( | executor.submit( | ||||
| download_document, | download_document, | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| ids[0], | |||||
| dataset_id, | |||||
| document_ids[i], | document_ids[i], | ||||
| tmp_path / f"ragflow_test_download_{i}.txt", | tmp_path / f"ragflow_test_download_{i}.txt", | ||||
| ) | ) | 
| from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
| import pytest | import pytest | ||||
| from common import ( | |||||
| INVALID_API_TOKEN, | |||||
| list_documnet, | |||||
| ) | |||||
| from common import INVALID_API_TOKEN, list_documnets | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, get_dataset_id_and_document_ids, auth, expected_code, expected_message): | |||||
| dataset_id, _ = get_dataset_id_and_document_ids | |||||
| res = list_documnet(auth, dataset_id) | |||||
| def test_invalid_auth(self, auth, expected_code, expected_message): | |||||
| res = list_documnets(auth, "dataset_id") | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| class TestDocumentList: | |||||
| def test_default(self, get_http_api_auth, get_dataset_id_and_document_ids): | |||||
| dataset_id, _ = get_dataset_id_and_document_ids | |||||
| res = list_documnet(get_http_api_auth, dataset_id) | |||||
| class TestDocumentsList: | |||||
| def test_default(self, get_http_api_auth, add_documents): | |||||
| dataset_id, _ = add_documents | |||||
| res = list_documnets(get_http_api_auth, dataset_id) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert len(res["data"]["docs"]) == 5 | assert len(res["data"]["docs"]) == 5 | ||||
| assert res["data"]["total"] == 5 | assert res["data"]["total"] == 5 | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_dataset_id(self, get_http_api_auth, get_dataset_id_and_document_ids, dataset_id, expected_code, expected_message): | |||||
| res = list_documnet(get_http_api_auth, dataset_id) | |||||
| def test_invalid_dataset_id(self, get_http_api_auth, dataset_id, expected_code, expected_message): | |||||
| res = list_documnets(get_http_api_auth, dataset_id) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| def test_page( | def test_page( | ||||
| self, | self, | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| get_dataset_id_and_document_ids, | |||||
| add_documents, | |||||
| params, | params, | ||||
| expected_code, | expected_code, | ||||
| expected_page_size, | expected_page_size, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, _ = get_dataset_id_and_document_ids | |||||
| res = list_documnet(get_http_api_auth, dataset_id, params=params) | |||||
| dataset_id, _ = add_documents | |||||
| res = list_documnets(get_http_api_auth, dataset_id, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| assert len(res["data"]["docs"]) == expected_page_size | assert len(res["data"]["docs"]) == expected_page_size | ||||
| def test_page_size( | def test_page_size( | ||||
| self, | self, | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| get_dataset_id_and_document_ids, | |||||
| add_documents, | |||||
| params, | params, | ||||
| expected_code, | expected_code, | ||||
| expected_page_size, | expected_page_size, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, _ = get_dataset_id_and_document_ids | |||||
| res = list_documnet(get_http_api_auth, dataset_id, params=params) | |||||
| dataset_id, _ = add_documents | |||||
| res = list_documnets(get_http_api_auth, dataset_id, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| assert len(res["data"]["docs"]) == expected_page_size | assert len(res["data"]["docs"]) == expected_page_size | ||||
| def test_orderby( | def test_orderby( | ||||
| self, | self, | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| get_dataset_id_and_document_ids, | |||||
| add_documents, | |||||
| params, | params, | ||||
| expected_code, | expected_code, | ||||
| assertions, | assertions, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, _ = get_dataset_id_and_document_ids | |||||
| res = list_documnet(get_http_api_auth, dataset_id, params=params) | |||||
| dataset_id, _ = add_documents | |||||
| res = list_documnets(get_http_api_auth, dataset_id, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| if callable(assertions): | if callable(assertions): | ||||
| def test_desc( | def test_desc( | ||||
| self, | self, | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| get_dataset_id_and_document_ids, | |||||
| add_documents, | |||||
| params, | params, | ||||
| expected_code, | expected_code, | ||||
| assertions, | assertions, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, _ = get_dataset_id_and_document_ids | |||||
| res = list_documnet(get_http_api_auth, dataset_id, params=params) | |||||
| dataset_id, _ = add_documents | |||||
| res = list_documnets(get_http_api_auth, dataset_id, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| if callable(assertions): | if callable(assertions): | ||||
| ({"keywords": "unknown"}, 0), | ({"keywords": "unknown"}, 0), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_keywords(self, get_http_api_auth, get_dataset_id_and_document_ids, params, expected_num): | |||||
| dataset_id, _ = get_dataset_id_and_document_ids | |||||
| res = list_documnet(get_http_api_auth, dataset_id, params=params) | |||||
| def test_keywords(self, get_http_api_auth, add_documents, params, expected_num): | |||||
| dataset_id, _ = add_documents | |||||
| res = list_documnets(get_http_api_auth, dataset_id, params=params) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert len(res["data"]["docs"]) == expected_num | assert len(res["data"]["docs"]) == expected_num | ||||
| assert res["data"]["total"] == expected_num | assert res["data"]["total"] == expected_num | ||||
| def test_name( | def test_name( | ||||
| self, | self, | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| get_dataset_id_and_document_ids, | |||||
| add_documents, | |||||
| params, | params, | ||||
| expected_code, | expected_code, | ||||
| expected_num, | expected_num, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, _ = get_dataset_id_and_document_ids | |||||
| res = list_documnet(get_http_api_auth, dataset_id, params=params) | |||||
| dataset_id, _ = add_documents | |||||
| res = list_documnets(get_http_api_auth, dataset_id, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| if params["name"] in [None, ""]: | if params["name"] in [None, ""]: | ||||
| def test_id( | def test_id( | ||||
| self, | self, | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| get_dataset_id_and_document_ids, | |||||
| add_documents, | |||||
| document_id, | document_id, | ||||
| expected_code, | expected_code, | ||||
| expected_num, | expected_num, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, document_ids = get_dataset_id_and_document_ids | |||||
| dataset_id, document_ids = add_documents | |||||
| if callable(document_id): | if callable(document_id): | ||||
| params = {"id": document_id(document_ids)} | params = {"id": document_id(document_ids)} | ||||
| else: | else: | ||||
| params = {"id": document_id} | params = {"id": document_id} | ||||
| res = list_documnet(get_http_api_auth, dataset_id, params=params) | |||||
| res = list_documnets(get_http_api_auth, dataset_id, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| def test_name_and_id( | def test_name_and_id( | ||||
| self, | self, | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| get_dataset_id_and_document_ids, | |||||
| add_documents, | |||||
| document_id, | document_id, | ||||
| name, | name, | ||||
| expected_code, | expected_code, | ||||
| expected_num, | expected_num, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, document_ids = get_dataset_id_and_document_ids | |||||
| dataset_id, document_ids = add_documents | |||||
| if callable(document_id): | if callable(document_id): | ||||
| params = {"id": document_id(document_ids), "name": name} | params = {"id": document_id(document_ids), "name": name} | ||||
| else: | else: | ||||
| params = {"id": document_id, "name": name} | params = {"id": document_id, "name": name} | ||||
| res = list_documnet(get_http_api_auth, dataset_id, params=params) | |||||
| res = list_documnets(get_http_api_auth, dataset_id, params=params) | |||||
| if expected_code == 0: | if expected_code == 0: | ||||
| assert len(res["data"]["docs"]) == expected_num | assert len(res["data"]["docs"]) == expected_num | ||||
| else: | else: | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| def test_concurrent_list(self, get_http_api_auth, get_dataset_id_and_document_ids): | |||||
| dataset_id, _ = get_dataset_id_and_document_ids | |||||
| def test_concurrent_list(self, get_http_api_auth, add_documents): | |||||
| dataset_id, _ = add_documents | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | with ThreadPoolExecutor(max_workers=5) as executor: | ||||
| futures = [executor.submit(list_documnet, get_http_api_auth, dataset_id) for i in range(100)] | |||||
| futures = [executor.submit(list_documnets, get_http_api_auth, dataset_id) for i in range(100)] | |||||
| responses = [f.result() for f in futures] | responses = [f.result() for f in futures] | ||||
| assert all(r["code"] == 0 for r in responses) | assert all(r["code"] == 0 for r in responses) | ||||
| def test_invalid_params(self, get_http_api_auth, get_dataset_id_and_document_ids): | |||||
| dataset_id, _ = get_dataset_id_and_document_ids | |||||
| def test_invalid_params(self, get_http_api_auth, add_documents): | |||||
| dataset_id, _ = add_documents | |||||
| params = {"a": "b"} | params = {"a": "b"} | ||||
| res = list_documnet(get_http_api_auth, dataset_id, params=params) | |||||
| res = list_documnets(get_http_api_auth, dataset_id, params=params) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert len(res["data"]["docs"]) == 5 | assert len(res["data"]["docs"]) == 5 | 
| from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
| import pytest | import pytest | ||||
| from common import ( | |||||
| INVALID_API_TOKEN, | |||||
| batch_create_datasets, | |||||
| bulk_upload_documents, | |||||
| list_documnet, | |||||
| parse_documnet, | |||||
| ) | |||||
| from common import INVALID_API_TOKEN, bulk_upload_documents, list_documnets, parse_documnets | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| from libs.utils import wait_for | from libs.utils import wait_for | ||||
| def validate_document_details(auth, dataset_id, document_ids): | def validate_document_details(auth, dataset_id, document_ids): | ||||
| for document_id in document_ids: | for document_id in document_ids: | ||||
| res = list_documnet(auth, dataset_id, params={"id": document_id}) | |||||
| res = list_documnets(auth, dataset_id, params={"id": document_id}) | |||||
| doc = res["data"]["docs"][0] | doc = res["data"]["docs"][0] | ||||
| assert doc["run"] == "DONE" | assert doc["run"] == "DONE" | ||||
| assert len(doc["process_begin_at"]) > 0 | assert len(doc["process_begin_at"]) > 0 | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, get_dataset_id_and_document_ids, auth, expected_code, expected_message): | |||||
| dataset_id, document_ids = get_dataset_id_and_document_ids | |||||
| res = parse_documnet(auth, dataset_id, {"document_ids": document_ids}) | |||||
| def test_invalid_auth(self, auth, expected_code, expected_message): | |||||
| res = parse_documnets(auth, "dataset_id") | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| @pytest.mark.usefixtures("clear_datasets") | |||||
| class TestDocumentsParse: | class TestDocumentsParse: | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "payload, expected_code, expected_message", | "payload, expected_code, expected_message", | ||||
| (lambda r: {"document_ids": r}, 0, ""), | (lambda r: {"document_ids": r}, 0, ""), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_basic_scenarios(self, get_http_api_auth, tmp_path, payload, expected_code, expected_message): | |||||
| def test_basic_scenarios(self, get_http_api_auth, add_documents_func, payload, expected_code, expected_message): | |||||
| @wait_for(10, 1, "Document parsing timeout") | @wait_for(10, 1, "Document parsing timeout") | ||||
| def condition(_auth, _dataset_id, _document_ids): | def condition(_auth, _dataset_id, _document_ids): | ||||
| for _document_id in _document_ids: | for _document_id in _document_ids: | ||||
| res = list_documnet(_auth, _dataset_id, {"id": _document_id}) | |||||
| res = list_documnets(_auth, _dataset_id, {"id": _document_id}) | |||||
| if res["data"]["docs"][0]["run"] != "DONE": | if res["data"]["docs"][0]["run"] != "DONE": | ||||
| return False | return False | ||||
| return True | return True | ||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| dataset_id = ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 3, tmp_path) | |||||
| dataset_id, document_ids = add_documents_func | |||||
| if callable(payload): | if callable(payload): | ||||
| payload = payload(document_ids) | payload = payload(document_ids) | ||||
| res = parse_documnet(get_http_api_auth, dataset_id, payload) | |||||
| res = parse_documnets(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code != 0: | if expected_code != 0: | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| def test_invalid_dataset_id( | def test_invalid_dataset_id( | ||||
| self, | self, | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| tmp_path, | |||||
| add_documents_func, | |||||
| dataset_id, | dataset_id, | ||||
| expected_code, | expected_code, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) | |||||
| res = parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| _, document_ids = add_documents_func | |||||
| res = parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| lambda r: {"document_ids": r + ["invalid_id"]}, | lambda r: {"document_ids": r + ["invalid_id"]}, | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_parse_partial_invalid_document_id(self, get_http_api_auth, tmp_path, payload): | |||||
| def test_parse_partial_invalid_document_id(self, get_http_api_auth, add_documents_func, payload): | |||||
| @wait_for(10, 1, "Document parsing timeout") | @wait_for(10, 1, "Document parsing timeout") | ||||
| def condition(_auth, _dataset_id): | def condition(_auth, _dataset_id): | ||||
| res = list_documnet(_auth, _dataset_id) | |||||
| res = list_documnets(_auth, _dataset_id) | |||||
| for doc in res["data"]["docs"]: | for doc in res["data"]["docs"]: | ||||
| if doc["run"] != "DONE": | if doc["run"] != "DONE": | ||||
| return False | return False | ||||
| return True | return True | ||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| dataset_id = ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 3, tmp_path) | |||||
| dataset_id, document_ids = add_documents_func | |||||
| if callable(payload): | if callable(payload): | ||||
| payload = payload(document_ids) | payload = payload(document_ids) | ||||
| res = parse_documnet(get_http_api_auth, dataset_id, payload) | |||||
| res = parse_documnets(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == "Documents not found: ['invalid_id']" | assert res["message"] == "Documents not found: ['invalid_id']" | ||||
| validate_document_details(get_http_api_auth, dataset_id, document_ids) | validate_document_details(get_http_api_auth, dataset_id, document_ids) | ||||
| def test_repeated_parse(self, get_http_api_auth, tmp_path): | |||||
| def test_repeated_parse(self, get_http_api_auth, add_documents_func): | |||||
| @wait_for(10, 1, "Document parsing timeout") | @wait_for(10, 1, "Document parsing timeout") | ||||
| def condition(_auth, _dataset_id): | def condition(_auth, _dataset_id): | ||||
| res = list_documnet(_auth, _dataset_id) | |||||
| res = list_documnets(_auth, _dataset_id) | |||||
| for doc in res["data"]["docs"]: | for doc in res["data"]["docs"]: | ||||
| if doc["run"] != "DONE": | if doc["run"] != "DONE": | ||||
| return False | return False | ||||
| return True | return True | ||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| dataset_id = ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 1, tmp_path) | |||||
| res = parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| dataset_id, document_ids = add_documents_func | |||||
| res = parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| condition(get_http_api_auth, dataset_id) | condition(get_http_api_auth, dataset_id) | ||||
| res = parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| res = parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| def test_duplicate_parse(self, get_http_api_auth, tmp_path): | |||||
| def test_duplicate_parse(self, get_http_api_auth, add_documents_func): | |||||
| @wait_for(10, 1, "Document parsing timeout") | @wait_for(10, 1, "Document parsing timeout") | ||||
| def condition(_auth, _dataset_id): | def condition(_auth, _dataset_id): | ||||
| res = list_documnet(_auth, _dataset_id) | |||||
| res = list_documnets(_auth, _dataset_id) | |||||
| for doc in res["data"]["docs"]: | for doc in res["data"]["docs"]: | ||||
| if doc["run"] != "DONE": | if doc["run"] != "DONE": | ||||
| return False | return False | ||||
| return True | return True | ||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| dataset_id = ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 1, tmp_path) | |||||
| res = parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids + document_ids}) | |||||
| dataset_id, document_ids = add_documents_func | |||||
| res = parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids + document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert res["data"]["errors"][0] == f"Duplicate document ids: {document_ids[0]}" | |||||
| assert res["data"]["success_count"] == 1 | |||||
| assert "Duplicate document ids" in res["data"]["errors"][0] | |||||
| assert res["data"]["success_count"] == 3 | |||||
| condition(get_http_api_auth, dataset_id) | condition(get_http_api_auth, dataset_id) | ||||
| validate_document_details(get_http_api_auth, dataset_id, document_ids) | validate_document_details(get_http_api_auth, dataset_id, document_ids) | ||||
| @pytest.mark.slow | |||||
| def test_parse_100_files(self, get_http_api_auth, tmp_path): | |||||
| @wait_for(100, 1, "Document parsing timeout") | |||||
| def condition(_auth, _dataset_id, _document_num): | |||||
| res = list_documnet(_auth, _dataset_id, {"page_size": _document_num}) | |||||
| for doc in res["data"]["docs"]: | |||||
| if doc["run"] != "DONE": | |||||
| return False | |||||
| return True | |||||
| document_num = 100 | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| dataset_id = ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path) | |||||
| res = parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == 0 | |||||
| condition(get_http_api_auth, dataset_id, document_num) | |||||
| validate_document_details(get_http_api_auth, dataset_id, document_ids) | |||||
| @pytest.mark.slow | |||||
| def test_concurrent_parse(self, get_http_api_auth, tmp_path): | |||||
| @wait_for(120, 1, "Document parsing timeout") | |||||
| def condition(_auth, _dataset_id, _document_num): | |||||
| res = list_documnet(_auth, _dataset_id, {"page_size": _document_num}) | |||||
| for doc in res["data"]["docs"]: | |||||
| if doc["run"] != "DONE": | |||||
| return False | |||||
| return True | |||||
| document_num = 100 | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| dataset_id = ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path) | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | |||||
| futures = [ | |||||
| executor.submit( | |||||
| parse_documnet, | |||||
| get_http_api_auth, | |||||
| dataset_id, | |||||
| {"document_ids": document_ids[i : i + 1]}, | |||||
| ) | |||||
| for i in range(document_num) | |||||
| ] | |||||
| responses = [f.result() for f in futures] | |||||
| assert all(r["code"] == 0 for r in responses) | |||||
| condition(get_http_api_auth, dataset_id, document_num) | |||||
| validate_document_details(get_http_api_auth, dataset_id, document_ids) | |||||
| @pytest.mark.slow | |||||
| def test_parse_100_files(get_http_api_auth, add_datase_func, tmp_path): | |||||
| @wait_for(100, 1, "Document parsing timeout") | |||||
| def condition(_auth, _dataset_id, _document_num): | |||||
| res = list_documnets(_auth, _dataset_id, {"page_size": _document_num}) | |||||
| for doc in res["data"]["docs"]: | |||||
| if doc["run"] != "DONE": | |||||
| return False | |||||
| return True | |||||
| document_num = 100 | |||||
| dataset_id = add_datase_func | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path) | |||||
| res = parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == 0 | |||||
| condition(get_http_api_auth, dataset_id, document_num) | |||||
| validate_document_details(get_http_api_auth, dataset_id, document_ids) | |||||
| @pytest.mark.slow | |||||
| def test_concurrent_parse(get_http_api_auth, add_datase_func, tmp_path): | |||||
| @wait_for(120, 1, "Document parsing timeout") | |||||
| def condition(_auth, _dataset_id, _document_num): | |||||
| res = list_documnets(_auth, _dataset_id, {"page_size": _document_num}) | |||||
| for doc in res["data"]["docs"]: | |||||
| if doc["run"] != "DONE": | |||||
| return False | |||||
| return True | |||||
| document_num = 100 | |||||
| dataset_id = add_datase_func | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path) | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | |||||
| futures = [ | |||||
| executor.submit( | |||||
| parse_documnets, | |||||
| get_http_api_auth, | |||||
| dataset_id, | |||||
| {"document_ids": document_ids[i : i + 1]}, | |||||
| ) | |||||
| for i in range(document_num) | |||||
| ] | |||||
| responses = [f.result() for f in futures] | |||||
| assert all(r["code"] == 0 for r in responses) | |||||
| condition(get_http_api_auth, dataset_id, document_num) | |||||
| validate_document_details(get_http_api_auth, dataset_id, document_ids) | 
| from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
| import pytest | import pytest | ||||
| from common import ( | |||||
| INVALID_API_TOKEN, | |||||
| batch_create_datasets, | |||||
| bulk_upload_documents, | |||||
| list_documnet, | |||||
| parse_documnet, | |||||
| stop_parse_documnet, | |||||
| ) | |||||
| from common import INVALID_API_TOKEN, bulk_upload_documents, list_documnets, parse_documnets, stop_parse_documnets | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| from libs.utils import wait_for | from libs.utils import wait_for | ||||
| def validate_document_parse_done(auth, dataset_id, document_ids): | def validate_document_parse_done(auth, dataset_id, document_ids): | ||||
| for document_id in document_ids: | for document_id in document_ids: | ||||
| res = list_documnet(auth, dataset_id, params={"id": document_id}) | |||||
| res = list_documnets(auth, dataset_id, params={"id": document_id}) | |||||
| doc = res["data"]["docs"][0] | doc = res["data"]["docs"][0] | ||||
| assert doc["run"] == "DONE" | assert doc["run"] == "DONE" | ||||
| assert len(doc["process_begin_at"]) > 0 | assert len(doc["process_begin_at"]) > 0 | ||||
| def validate_document_parse_cancel(auth, dataset_id, document_ids): | def validate_document_parse_cancel(auth, dataset_id, document_ids): | ||||
| for document_id in document_ids: | for document_id in document_ids: | ||||
| res = list_documnet(auth, dataset_id, params={"id": document_id}) | |||||
| res = list_documnets(auth, dataset_id, params={"id": document_id}) | |||||
| doc = res["data"]["docs"][0] | doc = res["data"]["docs"][0] | ||||
| assert doc["run"] == "CANCEL" | assert doc["run"] == "CANCEL" | ||||
| assert len(doc["process_begin_at"]) > 0 | assert len(doc["process_begin_at"]) > 0 | ||||
| assert doc["progress"] == 0.0 | assert doc["progress"] == 0.0 | ||||
| @pytest.mark.usefixtures("clear_datasets") | |||||
| class TestAuthorization: | class TestAuthorization: | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "auth, expected_code, expected_message", | "auth, expected_code, expected_message", | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, get_http_api_auth, auth, expected_code, expected_message): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| res = stop_parse_documnet(auth, ids[0]) | |||||
| def test_invalid_auth(self, auth, expected_code, expected_message): | |||||
| res = stop_parse_documnets(auth, "dataset_id") | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| @pytest.mark.skip | @pytest.mark.skip | ||||
| @pytest.mark.usefixtures("clear_datasets") | |||||
| class TestDocumentsParseStop: | class TestDocumentsParseStop: | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "payload, expected_code, expected_message", | "payload, expected_code, expected_message", | ||||
| (lambda r: {"document_ids": r}, 0, ""), | (lambda r: {"document_ids": r}, 0, ""), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_basic_scenarios(self, get_http_api_auth, tmp_path, payload, expected_code, expected_message): | |||||
| def test_basic_scenarios(self, get_http_api_auth, add_documents_func, payload, expected_code, expected_message): | |||||
| @wait_for(10, 1, "Document parsing timeout") | @wait_for(10, 1, "Document parsing timeout") | ||||
| def condition(_auth, _dataset_id, _document_ids): | def condition(_auth, _dataset_id, _document_ids): | ||||
| for _document_id in _document_ids: | for _document_id in _document_ids: | ||||
| res = list_documnet(_auth, _dataset_id, {"id": _document_id}) | |||||
| res = list_documnets(_auth, _dataset_id, {"id": _document_id}) | |||||
| if res["data"]["docs"][0]["run"] != "DONE": | if res["data"]["docs"][0]["run"] != "DONE": | ||||
| return False | return False | ||||
| return True | return True | ||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| dataset_id = ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 3, tmp_path) | |||||
| parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| dataset_id, document_ids = add_documents_func | |||||
| parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| if callable(payload): | if callable(payload): | ||||
| payload = payload(document_ids) | payload = payload(document_ids) | ||||
| res = stop_parse_documnet(get_http_api_auth, dataset_id, payload) | |||||
| res = stop_parse_documnets(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code != 0: | if expected_code != 0: | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| validate_document_parse_done(get_http_api_auth, dataset_id, completed_document_ids) | validate_document_parse_done(get_http_api_auth, dataset_id, completed_document_ids) | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "dataset_id, expected_code, expected_message", | |||||
| "invalid_dataset_id, expected_code, expected_message", | |||||
| [ | [ | ||||
| ("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"), | ("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"), | ||||
| ( | ( | ||||
| def test_invalid_dataset_id( | def test_invalid_dataset_id( | ||||
| self, | self, | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| tmp_path, | |||||
| dataset_id, | |||||
| add_documents_func, | |||||
| invalid_dataset_id, | |||||
| expected_code, | expected_code, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) | |||||
| res = stop_parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| dataset_id, document_ids = add_documents_func | |||||
| parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documnets(get_http_api_auth, invalid_dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| lambda r: {"document_ids": r + ["invalid_id"]}, | lambda r: {"document_ids": r + ["invalid_id"]}, | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_stop_parse_partial_invalid_document_id(self, get_http_api_auth, tmp_path, payload): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| dataset_id = ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 3, tmp_path) | |||||
| parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| def test_stop_parse_partial_invalid_document_id(self, get_http_api_auth, add_documents_func, payload): | |||||
| dataset_id, document_ids = add_documents_func | |||||
| parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| if callable(payload): | if callable(payload): | ||||
| payload = payload(document_ids) | payload = payload(document_ids) | ||||
| res = stop_parse_documnet(get_http_api_auth, dataset_id, payload) | |||||
| res = stop_parse_documnets(get_http_api_auth, dataset_id, payload) | |||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == "You don't own the document invalid_id." | assert res["message"] == "You don't own the document invalid_id." | ||||
| validate_document_parse_cancel(get_http_api_auth, dataset_id, document_ids) | validate_document_parse_cancel(get_http_api_auth, dataset_id, document_ids) | ||||
| def test_repeated_stop_parse(self, get_http_api_auth, tmp_path): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| dataset_id = ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 1, tmp_path) | |||||
| parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| def test_repeated_stop_parse(self, get_http_api_auth, add_documents_func): | |||||
| dataset_id, document_ids = add_documents_func | |||||
| parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| res = stop_parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == "Can't stop parsing document with progress at 0 or 1" | assert res["message"] == "Can't stop parsing document with progress at 0 or 1" | ||||
| def test_duplicate_stop_parse(self, get_http_api_auth, tmp_path): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| dataset_id = ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 1, tmp_path) | |||||
| parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids + document_ids}) | |||||
| def test_duplicate_stop_parse(self, get_http_api_auth, add_documents_func): | |||||
| dataset_id, document_ids = add_documents_func | |||||
| parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids + document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert res["data"]["success_count"] == 1 | |||||
| assert res["data"]["success_count"] == 3 | |||||
| assert f"Duplicate document ids: {document_ids[0]}" in res["data"]["errors"] | assert f"Duplicate document ids: {document_ids[0]}" in res["data"]["errors"] | ||||
| @pytest.mark.slow | |||||
| def test_stop_parse_100_files(self, get_http_api_auth, tmp_path): | |||||
| document_num = 100 | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| dataset_id = ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path) | |||||
| parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == 0 | |||||
| validate_document_parse_cancel(get_http_api_auth, dataset_id, document_ids) | |||||
| @pytest.mark.slow | |||||
| def test_concurrent_parse(self, get_http_api_auth, tmp_path): | |||||
| document_num = 50 | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| dataset_id = ids[0] | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path) | |||||
| parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | |||||
| futures = [ | |||||
| executor.submit( | |||||
| stop_parse_documnet, | |||||
| get_http_api_auth, | |||||
| dataset_id, | |||||
| {"document_ids": document_ids[i : i + 1]}, | |||||
| ) | |||||
| for i in range(document_num) | |||||
| ] | |||||
| responses = [f.result() for f in futures] | |||||
| assert all(r["code"] == 0 for r in responses) | |||||
| validate_document_parse_cancel(get_http_api_auth, dataset_id, document_ids) | |||||
| @pytest.mark.slow | |||||
| def test_stop_parse_100_files(get_http_api_auth, add_datase_func, tmp_path): | |||||
| document_num = 100 | |||||
| dataset_id = add_datase_func | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path) | |||||
| parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == 0 | |||||
| validate_document_parse_cancel(get_http_api_auth, dataset_id, document_ids) | |||||
| @pytest.mark.slow | |||||
| def test_concurrent_parse(get_http_api_auth, add_datase_func, tmp_path): | |||||
| document_num = 50 | |||||
| dataset_id = add_datase_func | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path) | |||||
| parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids}) | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | |||||
| futures = [ | |||||
| executor.submit( | |||||
| stop_parse_documnets, | |||||
| get_http_api_auth, | |||||
| dataset_id, | |||||
| {"document_ids": document_ids[i : i + 1]}, | |||||
| ) | |||||
| for i in range(document_num) | |||||
| ] | |||||
| responses = [f.result() for f in futures] | |||||
| assert all(r["code"] == 0 for r in responses) | |||||
| validate_document_parse_cancel(get_http_api_auth, dataset_id, document_ids) | 
| import pytest | import pytest | ||||
| from common import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN, batch_create_datasets, bulk_upload_documents, list_documnet, update_documnet | |||||
| from common import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN, list_documnets, update_documnet | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, get_dataset_id_and_document_ids, auth, expected_code, expected_message): | |||||
| dataset_id, document_ids = get_dataset_id_and_document_ids | |||||
| res = update_documnet(auth, dataset_id, document_ids[0], {"name": "auth_test.txt"}) | |||||
| def test_invalid_auth(self, auth, expected_code, expected_message): | |||||
| res = update_documnet(auth, "dataset_id", "document_id") | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| class TestUpdatedDocument: | |||||
| class TestDocumentsUpdated: | |||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "name, expected_code, expected_message", | "name, expected_code, expected_message", | ||||
| [ | [ | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_name(self, get_http_api_auth, get_dataset_id_and_document_ids, name, expected_code, expected_message): | |||||
| dataset_id, document_ids = get_dataset_id_and_document_ids | |||||
| def test_name(self, get_http_api_auth, add_documents, name, expected_code, expected_message): | |||||
| dataset_id, document_ids = add_documents | |||||
| res = update_documnet(get_http_api_auth, dataset_id, document_ids[0], {"name": name}) | res = update_documnet(get_http_api_auth, dataset_id, document_ids[0], {"name": name}) | ||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| res = list_documnet(get_http_api_auth, dataset_id, {"id": document_ids[0]}) | |||||
| res = list_documnets(get_http_api_auth, dataset_id, {"id": document_ids[0]}) | |||||
| assert res["data"]["docs"][0]["name"] == name | assert res["data"]["docs"][0]["name"] == name | ||||
| else: | else: | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_document_id(self, get_http_api_auth, get_dataset_id_and_document_ids, document_id, expected_code, expected_message): | |||||
| dataset_id, _ = get_dataset_id_and_document_ids | |||||
| def test_invalid_document_id(self, get_http_api_auth, add_documents, document_id, expected_code, expected_message): | |||||
| dataset_id, _ = add_documents | |||||
| res = update_documnet(get_http_api_auth, dataset_id, document_id, {"name": "new_name.txt"}) | res = update_documnet(get_http_api_auth, dataset_id, document_id, {"name": "new_name.txt"}) | ||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_dataset_id(self, get_http_api_auth, get_dataset_id_and_document_ids, dataset_id, expected_code, expected_message): | |||||
| _, document_ids = get_dataset_id_and_document_ids | |||||
| def test_invalid_dataset_id(self, get_http_api_auth, add_documents, dataset_id, expected_code, expected_message): | |||||
| _, document_ids = add_documents | |||||
| res = update_documnet(get_http_api_auth, dataset_id, document_ids[0], {"name": "new_name.txt"}) | res = update_documnet(get_http_api_auth, dataset_id, document_ids[0], {"name": "new_name.txt"}) | ||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| "meta_fields, expected_code, expected_message", | "meta_fields, expected_code, expected_message", | ||||
| [({"test": "test"}, 0, ""), ("test", 102, "meta_fields must be a dictionary")], | [({"test": "test"}, 0, ""), ("test", 102, "meta_fields must be a dictionary")], | ||||
| ) | ) | ||||
| def test_meta_fields(self, get_http_api_auth, get_dataset_id_and_document_ids, meta_fields, expected_code, expected_message): | |||||
| dataset_id, document_ids = get_dataset_id_and_document_ids | |||||
| def test_meta_fields(self, get_http_api_auth, add_documents, meta_fields, expected_code, expected_message): | |||||
| dataset_id, document_ids = add_documents | |||||
| res = update_documnet(get_http_api_auth, dataset_id, document_ids[0], {"meta_fields": meta_fields}) | res = update_documnet(get_http_api_auth, dataset_id, document_ids[0], {"meta_fields": meta_fields}) | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| res = list_documnet(get_http_api_auth, dataset_id, {"id": document_ids[0]}) | |||||
| res = list_documnets(get_http_api_auth, dataset_id, {"id": document_ids[0]}) | |||||
| assert res["data"]["docs"][0]["meta_fields"] == meta_fields | assert res["data"]["docs"][0]["meta_fields"] == meta_fields | ||||
| else: | else: | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_chunk_method(self, get_http_api_auth, get_dataset_id_and_document_ids, chunk_method, expected_code, expected_message): | |||||
| dataset_id, document_ids = get_dataset_id_and_document_ids | |||||
| def test_chunk_method(self, get_http_api_auth, add_documents, chunk_method, expected_code, expected_message): | |||||
| dataset_id, document_ids = add_documents | |||||
| res = update_documnet(get_http_api_auth, dataset_id, document_ids[0], {"chunk_method": chunk_method}) | res = update_documnet(get_http_api_auth, dataset_id, document_ids[0], {"chunk_method": chunk_method}) | ||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| res = list_documnet(get_http_api_auth, dataset_id, {"id": document_ids[0]}) | |||||
| res = list_documnets(get_http_api_auth, dataset_id, {"id": document_ids[0]}) | |||||
| if chunk_method != "": | if chunk_method != "": | ||||
| assert res["data"]["docs"][0]["chunk_method"] == chunk_method | assert res["data"]["docs"][0]["chunk_method"] == chunk_method | ||||
| else: | else: | ||||
| def test_invalid_field( | def test_invalid_field( | ||||
| self, | self, | ||||
| get_http_api_auth, | get_http_api_auth, | ||||
| get_dataset_id_and_document_ids, | |||||
| add_documents, | |||||
| payload, | payload, | ||||
| expected_code, | expected_code, | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, document_ids = get_dataset_id_and_document_ids | |||||
| dataset_id, document_ids = add_documents | |||||
| res = update_documnet(get_http_api_auth, dataset_id, document_ids[0], payload) | res = update_documnet(get_http_api_auth, dataset_id, document_ids[0], payload) | ||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| @pytest.mark.usefixtures("clear_datasets") | |||||
| @pytest.mark.parametrize( | |||||
| "chunk_method, parser_config, expected_code, expected_message", | |||||
| [ | |||||
| ("naive", {}, 0, ""), | |||||
| ( | |||||
| "naive", | |||||
| { | |||||
| "chunk_token_num": 128, | |||||
| "layout_recognize": "DeepDOC", | |||||
| "html4excel": False, | |||||
| "delimiter": "\\n!?;。;!?", | |||||
| "task_page_size": 12, | |||||
| "raptor": {"use_raptor": False}, | |||||
| }, | |||||
| 0, | |||||
| "", | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"chunk_token_num": -1}, | |||||
| 100, | |||||
| "AssertionError('chunk_token_num should be in range from 1 to 100000000')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"chunk_token_num": 0}, | |||||
| 100, | |||||
| "AssertionError('chunk_token_num should be in range from 1 to 100000000')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"chunk_token_num": 100000000}, | |||||
| 100, | |||||
| "AssertionError('chunk_token_num should be in range from 1 to 100000000')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"chunk_token_num": 3.14}, | |||||
| 102, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"chunk_token_num": "1024"}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| ( | |||||
| "naive", | |||||
| {"layout_recognize": "DeepDOC"}, | |||||
| 0, | |||||
| "", | |||||
| ), | |||||
| ( | |||||
| "naive", | |||||
| {"layout_recognize": "Naive"}, | |||||
| 0, | |||||
| "", | |||||
| ), | |||||
| ("naive", {"html4excel": True}, 0, ""), | |||||
| ("naive", {"html4excel": False}, 0, ""), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"html4excel": 1}, | |||||
| 100, | |||||
| "AssertionError('html4excel should be True or False')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| ("naive", {"delimiter": ""}, 0, ""), | |||||
| ("naive", {"delimiter": "`##`"}, 0, ""), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"delimiter": 1}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"task_page_size": -1}, | |||||
| 100, | |||||
| "AssertionError('task_page_size should be in range from 1 to 100000000')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"task_page_size": 0}, | |||||
| 100, | |||||
| "AssertionError('task_page_size should be in range from 1 to 100000000')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"task_page_size": 100000000}, | |||||
| 100, | |||||
| "AssertionError('task_page_size should be in range from 1 to 100000000')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"task_page_size": 3.14}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"task_page_size": "1024"}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| ("naive", {"raptor": {"use_raptor": True}}, 0, ""), | |||||
| ("naive", {"raptor": {"use_raptor": False}}, 0, ""), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"invalid_key": "invalid_value"}, | |||||
| 100, | |||||
| """AssertionError("Abnormal \'parser_config\'. Invalid key: invalid_key")""", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_keywords": -1}, | |||||
| 100, | |||||
| "AssertionError('auto_keywords should be in range from 0 to 32')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_keywords": 32}, | |||||
| 100, | |||||
| "AssertionError('auto_keywords should be in range from 0 to 32')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_questions": 3.14}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_keywords": "1024"}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_questions": -1}, | |||||
| 100, | |||||
| "AssertionError('auto_questions should be in range from 0 to 10')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_questions": 10}, | |||||
| 100, | |||||
| "AssertionError('auto_questions should be in range from 0 to 10')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_questions": 3.14}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_questions": "1024"}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"topn_tags": -1}, | |||||
| 100, | |||||
| "AssertionError('topn_tags should be in range from 0 to 10')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"topn_tags": 10}, | |||||
| 100, | |||||
| "AssertionError('topn_tags should be in range from 0 to 10')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"topn_tags": 3.14}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"topn_tags": "1024"}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| ], | |||||
| ) | |||||
| def test_parser_config( | |||||
| get_http_api_auth, | |||||
| tmp_path, | |||||
| chunk_method, | |||||
| parser_config, | |||||
| expected_code, | |||||
| expected_message, | |||||
| ): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| document_ids = bulk_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) | |||||
| res = update_documnet( | |||||
| get_http_api_auth, | |||||
| ids[0], | |||||
| document_ids[0], | |||||
| {"chunk_method": chunk_method, "parser_config": parser_config}, | |||||
| class TestUpdateDocumentParserConfig: | |||||
| @pytest.mark.parametrize( | |||||
| "chunk_method, parser_config, expected_code, expected_message", | |||||
| [ | |||||
| ("naive", {}, 0, ""), | |||||
| ( | |||||
| "naive", | |||||
| { | |||||
| "chunk_token_num": 128, | |||||
| "layout_recognize": "DeepDOC", | |||||
| "html4excel": False, | |||||
| "delimiter": "\\n!?;。;!?", | |||||
| "task_page_size": 12, | |||||
| "raptor": {"use_raptor": False}, | |||||
| }, | |||||
| 0, | |||||
| "", | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"chunk_token_num": -1}, | |||||
| 100, | |||||
| "AssertionError('chunk_token_num should be in range from 1 to 100000000')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"chunk_token_num": 0}, | |||||
| 100, | |||||
| "AssertionError('chunk_token_num should be in range from 1 to 100000000')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"chunk_token_num": 100000000}, | |||||
| 100, | |||||
| "AssertionError('chunk_token_num should be in range from 1 to 100000000')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"chunk_token_num": 3.14}, | |||||
| 102, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"chunk_token_num": "1024"}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| ( | |||||
| "naive", | |||||
| {"layout_recognize": "DeepDOC"}, | |||||
| 0, | |||||
| "", | |||||
| ), | |||||
| ( | |||||
| "naive", | |||||
| {"layout_recognize": "Naive"}, | |||||
| 0, | |||||
| "", | |||||
| ), | |||||
| ("naive", {"html4excel": True}, 0, ""), | |||||
| ("naive", {"html4excel": False}, 0, ""), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"html4excel": 1}, | |||||
| 100, | |||||
| "AssertionError('html4excel should be True or False')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| ("naive", {"delimiter": ""}, 0, ""), | |||||
| ("naive", {"delimiter": "`##`"}, 0, ""), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"delimiter": 1}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"task_page_size": -1}, | |||||
| 100, | |||||
| "AssertionError('task_page_size should be in range from 1 to 100000000')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"task_page_size": 0}, | |||||
| 100, | |||||
| "AssertionError('task_page_size should be in range from 1 to 100000000')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"task_page_size": 100000000}, | |||||
| 100, | |||||
| "AssertionError('task_page_size should be in range from 1 to 100000000')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"task_page_size": 3.14}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"task_page_size": "1024"}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| ("naive", {"raptor": {"use_raptor": True}}, 0, ""), | |||||
| ("naive", {"raptor": {"use_raptor": False}}, 0, ""), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"invalid_key": "invalid_value"}, | |||||
| 100, | |||||
| """AssertionError("Abnormal \'parser_config\'. Invalid key: invalid_key")""", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_keywords": -1}, | |||||
| 100, | |||||
| "AssertionError('auto_keywords should be in range from 0 to 32')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_keywords": 32}, | |||||
| 100, | |||||
| "AssertionError('auto_keywords should be in range from 0 to 32')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_questions": 3.14}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_keywords": "1024"}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_questions": -1}, | |||||
| 100, | |||||
| "AssertionError('auto_questions should be in range from 0 to 10')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_questions": 10}, | |||||
| 100, | |||||
| "AssertionError('auto_questions should be in range from 0 to 10')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_questions": 3.14}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"auto_questions": "1024"}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"topn_tags": -1}, | |||||
| 100, | |||||
| "AssertionError('topn_tags should be in range from 0 to 10')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"topn_tags": 10}, | |||||
| 100, | |||||
| "AssertionError('topn_tags should be in range from 0 to 10')", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"topn_tags": 3.14}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| pytest.param( | |||||
| "naive", | |||||
| {"topn_tags": "1024"}, | |||||
| 100, | |||||
| "", | |||||
| marks=pytest.mark.skip(reason="issues/6098"), | |||||
| ), | |||||
| ], | |||||
| ) | ) | ||||
| assert res["code"] == expected_code | |||||
| if expected_code == 0: | |||||
| res = list_documnet(get_http_api_auth, ids[0], {"id": document_ids[0]}) | |||||
| if parser_config != {}: | |||||
| for k, v in parser_config.items(): | |||||
| assert res["data"]["docs"][0]["parser_config"][k] == v | |||||
| else: | |||||
| assert res["data"]["docs"][0]["parser_config"] == { | |||||
| "chunk_token_num": 128, | |||||
| "delimiter": "\\n!?;。;!?", | |||||
| "html4excel": False, | |||||
| "layout_recognize": "DeepDOC", | |||||
| "raptor": {"use_raptor": False}, | |||||
| } | |||||
| if expected_code != 0 or expected_message: | |||||
| assert res["message"] == expected_message | |||||
| def test_parser_config( | |||||
| self, | |||||
| get_http_api_auth, | |||||
| add_documents, | |||||
| chunk_method, | |||||
| parser_config, | |||||
| expected_code, | |||||
| expected_message, | |||||
| ): | |||||
| dataset_id, document_ids = add_documents | |||||
| res = update_documnet( | |||||
| get_http_api_auth, | |||||
| dataset_id, | |||||
| document_ids[0], | |||||
| {"chunk_method": chunk_method, "parser_config": parser_config}, | |||||
| ) | |||||
| assert res["code"] == expected_code | |||||
| if expected_code == 0: | |||||
| res = list_documnets(get_http_api_auth, dataset_id, {"id": document_ids[0]}) | |||||
| if parser_config != {}: | |||||
| for k, v in parser_config.items(): | |||||
| assert res["data"]["docs"][0]["parser_config"][k] == v | |||||
| else: | |||||
| assert res["data"]["docs"][0]["parser_config"] == { | |||||
| "chunk_token_num": 128, | |||||
| "delimiter": "\\n!?;。;!?", | |||||
| "html4excel": False, | |||||
| "layout_recognize": "DeepDOC", | |||||
| "raptor": {"use_raptor": False}, | |||||
| } | |||||
| if expected_code != 0 or expected_message: | |||||
| assert res["message"] == expected_message | 
| import pytest | import pytest | ||||
| import requests | import requests | ||||
| from common import ( | |||||
| DOCUMENT_NAME_LIMIT, | |||||
| FILE_API_URL, | |||||
| HOST_ADDRESS, | |||||
| INVALID_API_TOKEN, | |||||
| batch_create_datasets, | |||||
| list_dataset, | |||||
| upload_documnets, | |||||
| ) | |||||
| from common import DOCUMENT_NAME_LIMIT, FILE_API_URL, HOST_ADDRESS, INVALID_API_TOKEN, list_datasets, upload_documnets | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| from libs.utils.file_utils import create_txt_file | from libs.utils.file_utils import create_txt_file | ||||
| from requests_toolbelt import MultipartEncoder | from requests_toolbelt import MultipartEncoder | ||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, get_http_api_auth, auth, expected_code, expected_message): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| res = upload_documnets(auth, ids[0]) | |||||
| def test_invalid_auth(self, auth, expected_code, expected_message): | |||||
| res = upload_documnets(auth, "dataset_id") | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| @pytest.mark.usefixtures("clear_datasets") | |||||
| class TestUploadDocuments: | |||||
| def test_valid_single_upload(self, get_http_api_auth, tmp_path): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| class TestDocumentsUpload: | |||||
| def test_valid_single_upload(self, get_http_api_auth, add_dataset_func, tmp_path): | |||||
| dataset_id = add_dataset_func | |||||
| fp = create_txt_file(tmp_path / "ragflow_test.txt") | fp = create_txt_file(tmp_path / "ragflow_test.txt") | ||||
| res = upload_documnets(get_http_api_auth, ids[0], [fp]) | |||||
| res = upload_documnets(get_http_api_auth, dataset_id, [fp]) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert res["data"][0]["dataset_id"] == ids[0] | |||||
| assert res["data"][0]["dataset_id"] == dataset_id | |||||
| assert res["data"][0]["name"] == fp.name | assert res["data"][0]["name"] == fp.name | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| ], | ], | ||||
| indirect=True, | indirect=True, | ||||
| ) | ) | ||||
| def test_file_type_validation(self, get_http_api_auth, generate_test_files, request): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_file_type_validation(self, get_http_api_auth, add_dataset_func, generate_test_files, request): | |||||
| dataset_id = add_dataset_func | |||||
| fp = generate_test_files[request.node.callspec.params["generate_test_files"]] | fp = generate_test_files[request.node.callspec.params["generate_test_files"]] | ||||
| res = upload_documnets(get_http_api_auth, ids[0], [fp]) | |||||
| res = upload_documnets(get_http_api_auth, dataset_id, [fp]) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert res["data"][0]["dataset_id"] == ids[0] | |||||
| assert res["data"][0]["dataset_id"] == dataset_id | |||||
| assert res["data"][0]["name"] == fp.name | assert res["data"][0]["name"] == fp.name | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "file_type", | "file_type", | ||||
| ["exe", "unknown"], | ["exe", "unknown"], | ||||
| ) | ) | ||||
| def test_unsupported_file_type(self, get_http_api_auth, tmp_path, file_type): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_unsupported_file_type(self, get_http_api_auth, add_dataset_func, tmp_path, file_type): | |||||
| dataset_id = add_dataset_func | |||||
| fp = tmp_path / f"ragflow_test.{file_type}" | fp = tmp_path / f"ragflow_test.{file_type}" | ||||
| fp.touch() | fp.touch() | ||||
| res = upload_documnets(get_http_api_auth, ids[0], [fp]) | |||||
| res = upload_documnets(get_http_api_auth, dataset_id, [fp]) | |||||
| assert res["code"] == 500 | assert res["code"] == 500 | ||||
| assert res["message"] == f"ragflow_test.{file_type}: This type of file has not been supported yet!" | assert res["message"] == f"ragflow_test.{file_type}: This type of file has not been supported yet!" | ||||
| def test_missing_file(self, get_http_api_auth): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| res = upload_documnets(get_http_api_auth, ids[0]) | |||||
| def test_missing_file(self, get_http_api_auth, add_dataset_func): | |||||
| dataset_id = add_dataset_func | |||||
| res = upload_documnets(get_http_api_auth, dataset_id) | |||||
| assert res["code"] == 101 | assert res["code"] == 101 | ||||
| assert res["message"] == "No file part!" | assert res["message"] == "No file part!" | ||||
| def test_empty_file(self, get_http_api_auth, tmp_path): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_empty_file(self, get_http_api_auth, add_dataset_func, tmp_path): | |||||
| dataset_id = add_dataset_func | |||||
| fp = tmp_path / "empty.txt" | fp = tmp_path / "empty.txt" | ||||
| fp.touch() | fp.touch() | ||||
| res = upload_documnets(get_http_api_auth, ids[0], [fp]) | |||||
| res = upload_documnets(get_http_api_auth, dataset_id, [fp]) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert res["data"][0]["size"] == 0 | assert res["data"][0]["size"] == 0 | ||||
| def test_filename_empty(self, get_http_api_auth, tmp_path): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_filename_empty(self, get_http_api_auth, add_dataset_func, tmp_path): | |||||
| dataset_id = add_dataset_func | |||||
| fp = create_txt_file(tmp_path / "ragflow_test.txt") | fp = create_txt_file(tmp_path / "ragflow_test.txt") | ||||
| url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=ids[0]) | |||||
| url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id) | |||||
| fields = (("file", ("", fp.open("rb"))),) | fields = (("file", ("", fp.open("rb"))),) | ||||
| m = MultipartEncoder(fields=fields) | m = MultipartEncoder(fields=fields) | ||||
| res = requests.post( | res = requests.post( | ||||
| assert res.json()["code"] == 101 | assert res.json()["code"] == 101 | ||||
| assert res.json()["message"] == "No file selected!" | assert res.json()["message"] == "No file selected!" | ||||
| def test_filename_exceeds_max_length(self, get_http_api_auth, tmp_path): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_filename_exceeds_max_length(self, get_http_api_auth, add_dataset_func, tmp_path): | |||||
| dataset_id = add_dataset_func | |||||
| # filename_length = 129 | # filename_length = 129 | ||||
| fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt") | fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt") | ||||
| res = upload_documnets(get_http_api_auth, ids[0], [fp]) | |||||
| res = upload_documnets(get_http_api_auth, dataset_id, [fp]) | |||||
| assert res["code"] == 101 | assert res["code"] == 101 | ||||
| assert res["message"] == "File name should be less than 128 bytes." | assert res["message"] == "File name should be less than 128 bytes." | ||||
| assert res["code"] == 100 | assert res["code"] == 100 | ||||
| assert res["message"] == """LookupError("Can\'t find the dataset with ID invalid_dataset_id!")""" | assert res["message"] == """LookupError("Can\'t find the dataset with ID invalid_dataset_id!")""" | ||||
| def test_duplicate_files(self, get_http_api_auth, tmp_path): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_duplicate_files(self, get_http_api_auth, add_dataset_func, tmp_path): | |||||
| dataset_id = add_dataset_func | |||||
| fp = create_txt_file(tmp_path / "ragflow_test.txt") | fp = create_txt_file(tmp_path / "ragflow_test.txt") | ||||
| res = upload_documnets(get_http_api_auth, ids[0], [fp, fp]) | |||||
| res = upload_documnets(get_http_api_auth, dataset_id, [fp, fp]) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert len(res["data"]) == 2 | assert len(res["data"]) == 2 | ||||
| for i in range(len(res["data"])): | for i in range(len(res["data"])): | ||||
| assert res["data"][i]["dataset_id"] == ids[0] | |||||
| assert res["data"][i]["dataset_id"] == dataset_id | |||||
| expected_name = fp.name | expected_name = fp.name | ||||
| if i != 0: | if i != 0: | ||||
| expected_name = f"{fp.stem}({i}){fp.suffix}" | expected_name = f"{fp.stem}({i}){fp.suffix}" | ||||
| assert res["data"][i]["name"] == expected_name | assert res["data"][i]["name"] == expected_name | ||||
| def test_same_file_repeat(self, get_http_api_auth, tmp_path): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_same_file_repeat(self, get_http_api_auth, add_dataset_func, tmp_path): | |||||
| dataset_id = add_dataset_func | |||||
| fp = create_txt_file(tmp_path / "ragflow_test.txt") | fp = create_txt_file(tmp_path / "ragflow_test.txt") | ||||
| for i in range(10): | for i in range(10): | ||||
| res = upload_documnets(get_http_api_auth, ids[0], [fp]) | |||||
| res = upload_documnets(get_http_api_auth, dataset_id, [fp]) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert len(res["data"]) == 1 | assert len(res["data"]) == 1 | ||||
| assert res["data"][0]["dataset_id"] == ids[0] | |||||
| assert res["data"][0]["dataset_id"] == dataset_id | |||||
| expected_name = fp.name | expected_name = fp.name | ||||
| if i != 0: | if i != 0: | ||||
| expected_name = f"{fp.stem}({i}){fp.suffix}" | expected_name = f"{fp.stem}({i}){fp.suffix}" | ||||
| assert res["data"][0]["name"] == expected_name | assert res["data"][0]["name"] == expected_name | ||||
| def test_filename_special_characters(self, get_http_api_auth, tmp_path): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_filename_special_characters(self, get_http_api_auth, add_dataset_func, tmp_path): | |||||
| dataset_id = add_dataset_func | |||||
| illegal_chars = '<>:"/\\|?*' | illegal_chars = '<>:"/\\|?*' | ||||
| translation_table = str.maketrans({char: "_" for char in illegal_chars}) | translation_table = str.maketrans({char: "_" for char in illegal_chars}) | ||||
| safe_filename = string.punctuation.translate(translation_table) | safe_filename = string.punctuation.translate(translation_table) | ||||
| fp = tmp_path / f"{safe_filename}.txt" | fp = tmp_path / f"{safe_filename}.txt" | ||||
| fp.write_text("Sample text content") | fp.write_text("Sample text content") | ||||
| res = upload_documnets(get_http_api_auth, ids[0], [fp]) | |||||
| res = upload_documnets(get_http_api_auth, dataset_id, [fp]) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert len(res["data"]) == 1 | assert len(res["data"]) == 1 | ||||
| assert res["data"][0]["dataset_id"] == ids[0] | |||||
| assert res["data"][0]["dataset_id"] == dataset_id | |||||
| assert res["data"][0]["name"] == fp.name | assert res["data"][0]["name"] == fp.name | ||||
| def test_multiple_files(self, get_http_api_auth, tmp_path): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_multiple_files(self, get_http_api_auth, add_dataset_func, tmp_path): | |||||
| dataset_id = add_dataset_func | |||||
| expected_document_count = 20 | expected_document_count = 20 | ||||
| fps = [] | fps = [] | ||||
| for i in range(expected_document_count): | for i in range(expected_document_count): | ||||
| fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt") | fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt") | ||||
| fps.append(fp) | fps.append(fp) | ||||
| res = upload_documnets(get_http_api_auth, ids[0], fps) | |||||
| res = upload_documnets(get_http_api_auth, dataset_id, fps) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| res = list_dataset(get_http_api_auth, {"id": ids[0]}) | |||||
| res = list_datasets(get_http_api_auth, {"id": dataset_id}) | |||||
| assert res["data"][0]["document_count"] == expected_document_count | assert res["data"][0]["document_count"] == expected_document_count | ||||
| def test_concurrent_upload(self, get_http_api_auth, tmp_path): | |||||
| ids = batch_create_datasets(get_http_api_auth, 1) | |||||
| def test_concurrent_upload(self, get_http_api_auth, add_dataset_func, tmp_path): | |||||
| dataset_id = add_dataset_func | |||||
| expected_document_count = 20 | expected_document_count = 20 | ||||
| fps = [] | fps = [] | ||||
| fps.append(fp) | fps.append(fp) | ||||
| with ThreadPoolExecutor(max_workers=5) as executor: | with ThreadPoolExecutor(max_workers=5) as executor: | ||||
| futures = [executor.submit(upload_documnets, get_http_api_auth, ids[0], fps[i : i + 1]) for i in range(expected_document_count)] | |||||
| futures = [executor.submit(upload_documnets, get_http_api_auth, dataset_id, fps[i : i + 1]) for i in range(expected_document_count)] | |||||
| responses = [f.result() for f in futures] | responses = [f.result() for f in futures] | ||||
| assert all(r["code"] == 0 for r in responses) | assert all(r["code"] == 0 for r in responses) | ||||
| res = list_dataset(get_http_api_auth, {"id": ids[0]}) | |||||
| res = list_datasets(get_http_api_auth, {"id": dataset_id}) | |||||
| assert res["data"][0]["document_count"] == expected_document_count | assert res["data"][0]["document_count"] == expected_document_count |